Intrinsics: expand semantics of LLVMExtendedVectorType (& trunc)
[oota-llvm.git] / include / llvm / IR / IntrinsicsAArch64.td
1 //===- IntrinsicsAArch64.td - Defines AArch64 intrinsics -----------*- tablegen -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines all of the AArch64-specific intrinsics.
11 //
12 //===----------------------------------------------------------------------===//
13
14 //===----------------------------------------------------------------------===//
15 // Advanced SIMD (NEON)
16
17 let TargetPrefix = "aarch64" in {  // All intrinsics start with "llvm.aarch64.".
18
19 // Vector Absolute Compare (Floating Point)
20 def int_aarch64_neon_vacgeq :
21   Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
22 def int_aarch64_neon_vacgtq :
23   Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
24
25 // Vector saturating accumulate
26 def int_aarch64_neon_suqadd : Neon_2Arg_Intrinsic;
27 def int_aarch64_neon_usqadd : Neon_2Arg_Intrinsic;
28
29 // Vector Bitwise reverse
30 def int_aarch64_neon_rbit : Neon_1Arg_Intrinsic;
31
32 // Vector extract and narrow
33 def int_aarch64_neon_xtn : 
34   Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
35
36 // Vector floating-point convert
37 def int_aarch64_neon_frintn : Neon_1Arg_Intrinsic;
38 def int_aarch64_neon_fsqrt : Neon_1Arg_Intrinsic;
39 def int_aarch64_neon_vcvtxn :
40   Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
41 def int_aarch64_neon_vcvtzs :
42   Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
43 def int_aarch64_neon_vcvtzu :
44   Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
45
46 // Vector maxNum (Floating Point)
47 def int_aarch64_neon_vmaxnm : Neon_2Arg_Intrinsic;
48
49 // Vector minNum (Floating Point)
50 def int_aarch64_neon_vminnm : Neon_2Arg_Intrinsic;
51
52 // Vector Pairwise maxNum (Floating Point)
53 def int_aarch64_neon_vpmaxnm : Neon_2Arg_Intrinsic;
54
55 // Vector Pairwise minNum (Floating Point)
56 def int_aarch64_neon_vpminnm : Neon_2Arg_Intrinsic;
57
58 // Vector Multiply Extended and Scalar Multiply Extended (Floating Point)
59 def int_aarch64_neon_vmulx  :
60   Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>]>;
61
62 class Neon_N2V_Intrinsic
63   : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_i32_ty],
64               [IntrNoMem]>;
65 class Neon_N3V_Intrinsic
66   : Intrinsic<[llvm_anyvector_ty],
67               [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
68               [IntrNoMem]>;
69 class Neon_N2V_Narrow_Intrinsic
70   : Intrinsic<[llvm_anyvector_ty],
71               [LLVMExtendedType<0>, llvm_i32_ty],
72               [IntrNoMem]>;
73
74 // Vector rounding shift right by immediate (Signed)
75 def int_aarch64_neon_vsrshr : Neon_N2V_Intrinsic;
76 def int_aarch64_neon_vurshr : Neon_N2V_Intrinsic;
77 def int_aarch64_neon_vsqshlu : Neon_N2V_Intrinsic;
78
79 def int_aarch64_neon_vsri : Neon_N3V_Intrinsic;
80 def int_aarch64_neon_vsli : Neon_N3V_Intrinsic;
81
82 def int_aarch64_neon_vsqshrun : Neon_N2V_Narrow_Intrinsic;
83 def int_aarch64_neon_vrshrn : Neon_N2V_Narrow_Intrinsic;
84 def int_aarch64_neon_vsqrshrun : Neon_N2V_Narrow_Intrinsic;
85 def int_aarch64_neon_vsqshrn : Neon_N2V_Narrow_Intrinsic;
86 def int_aarch64_neon_vuqshrn : Neon_N2V_Narrow_Intrinsic;
87 def int_aarch64_neon_vsqrshrn : Neon_N2V_Narrow_Intrinsic;
88 def int_aarch64_neon_vuqrshrn : Neon_N2V_Narrow_Intrinsic;
89
90 // Vector across
91 class Neon_Across_Intrinsic
92   : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
93
94 def int_aarch64_neon_saddlv : Neon_Across_Intrinsic;
95 def int_aarch64_neon_uaddlv : Neon_Across_Intrinsic;
96 def int_aarch64_neon_smaxv  : Neon_Across_Intrinsic;
97 def int_aarch64_neon_umaxv  : Neon_Across_Intrinsic;
98 def int_aarch64_neon_sminv  : Neon_Across_Intrinsic;
99 def int_aarch64_neon_uminv  : Neon_Across_Intrinsic;
100 def int_aarch64_neon_vaddv  : Neon_Across_Intrinsic;
101 def int_aarch64_neon_vmaxv :
102   Intrinsic<[llvm_float_ty], [llvm_v4f32_ty], [IntrNoMem]>;
103 def int_aarch64_neon_vminv :
104   Intrinsic<[llvm_float_ty], [llvm_v4f32_ty], [IntrNoMem]>;
105 def int_aarch64_neon_vmaxnmv :
106   Intrinsic<[llvm_float_ty], [llvm_v4f32_ty], [IntrNoMem]>;
107 def int_aarch64_neon_vminnmv :
108   Intrinsic<[llvm_float_ty], [llvm_v4f32_ty], [IntrNoMem]>;
109
110 // Vector Table Lookup.
111 def int_aarch64_neon_vtbl1 :
112   Intrinsic<[llvm_anyvector_ty],
113             [llvm_v16i8_ty, LLVMMatchType<0>], [IntrNoMem]>;
114
115 def int_aarch64_neon_vtbl2 :
116   Intrinsic<[llvm_anyvector_ty],
117             [llvm_v16i8_ty, llvm_v16i8_ty, LLVMMatchType<0>],
118             [IntrNoMem]>;
119
120 def int_aarch64_neon_vtbl3 :
121   Intrinsic<[llvm_anyvector_ty],
122             [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty,
123             LLVMMatchType<0>], [IntrNoMem]>;
124
125 def int_aarch64_neon_vtbl4 :
126   Intrinsic<[llvm_anyvector_ty],
127             [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty,
128             llvm_v16i8_ty, LLVMMatchType<0>], [IntrNoMem]>;
129
130 // Vector Table Extension.
131 // Some elements of the destination vector may not be updated, so the original
132 // value of that vector is passed as the first argument.  The next 1-4
133 // arguments after that are the table.
134 def int_aarch64_neon_vtbx1 :
135   Intrinsic<[llvm_anyvector_ty],
136             [LLVMMatchType<0>, llvm_v16i8_ty, LLVMMatchType<0>],
137             [IntrNoMem]>;
138
139 def int_aarch64_neon_vtbx2 :
140   Intrinsic<[llvm_anyvector_ty],
141             [LLVMMatchType<0>, llvm_v16i8_ty, llvm_v16i8_ty,
142              LLVMMatchType<0>], [IntrNoMem]>;
143
144 def int_aarch64_neon_vtbx3 :
145   Intrinsic<[llvm_anyvector_ty],
146             [LLVMMatchType<0>, llvm_v16i8_ty, llvm_v16i8_ty,
147              llvm_v16i8_ty, LLVMMatchType<0>], [IntrNoMem]>;
148
149 def int_aarch64_neon_vtbx4 :
150   Intrinsic<[llvm_anyvector_ty],
151             [LLVMMatchType<0>, llvm_v16i8_ty, llvm_v16i8_ty,
152              llvm_v16i8_ty, llvm_v16i8_ty, LLVMMatchType<0>],
153             [IntrNoMem]>;
154
155 // Vector Load/store
156 def int_aarch64_neon_vld1x2 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
157                                         [llvm_ptr_ty, llvm_i32_ty],
158                                         [IntrReadArgMem]>;
159 def int_aarch64_neon_vld1x3 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
160                                          LLVMMatchType<0>],
161                                         [llvm_ptr_ty, llvm_i32_ty],
162                                         [IntrReadArgMem]>;
163 def int_aarch64_neon_vld1x4 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
164                                          LLVMMatchType<0>, LLVMMatchType<0>],
165                                         [llvm_ptr_ty, llvm_i32_ty],
166                                         [IntrReadArgMem]>;
167
168 def int_aarch64_neon_vst1x2 : Intrinsic<[],
169                                         [llvm_ptr_ty, llvm_anyvector_ty,
170                                          LLVMMatchType<0>, llvm_i32_ty],
171                                         [IntrReadWriteArgMem]>;
172 def int_aarch64_neon_vst1x3 : Intrinsic<[],
173                                         [llvm_ptr_ty, llvm_anyvector_ty,
174                                          LLVMMatchType<0>, LLVMMatchType<0>,
175                                          llvm_i32_ty], [IntrReadWriteArgMem]>;
176 def int_aarch64_neon_vst1x4 : Intrinsic<[],
177                                         [llvm_ptr_ty, llvm_anyvector_ty,
178                                          LLVMMatchType<0>, LLVMMatchType<0>,
179                                          LLVMMatchType<0>, llvm_i32_ty],
180                                         [IntrReadWriteArgMem]>;
181
182 // Scalar Add
183 def int_aarch64_neon_vaddds :
184   Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
185 def int_aarch64_neon_vadddu :
186   Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
187
188
189 // Scalar Sub
190 def int_aarch64_neon_vsubds :
191   Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
192 def int_aarch64_neon_vsubdu :
193   Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
194
195
196 // Scalar Shift
197 // Scalar Shift Left
198 def int_aarch64_neon_vshlds :
199   Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
200 def int_aarch64_neon_vshldu :
201   Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
202
203 // Scalar Saturating Shift Left
204 def int_aarch64_neon_vqshls : Neon_2Arg_Intrinsic;
205 def int_aarch64_neon_vqshlu : Neon_2Arg_Intrinsic;
206
207 // Scalar Shift Rouding Left
208 def int_aarch64_neon_vrshlds :
209   Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
210 def int_aarch64_neon_vrshldu :
211   Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
212
213 // Scalar Saturating Rounding Shift Left
214 def int_aarch64_neon_vqrshls : Neon_2Arg_Intrinsic;
215 def int_aarch64_neon_vqrshlu : Neon_2Arg_Intrinsic;
216
217 // Scalar Reduce Pairwise Add.
218 def int_aarch64_neon_vpadd :
219   Intrinsic<[llvm_v1i64_ty], [llvm_v2i64_ty],[IntrNoMem]>;
220 def int_aarch64_neon_vpfadd :
221   Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>;
222
223 // Scalar Reduce Pairwise Floating Point Max/Min.
224 def int_aarch64_neon_vpmax :
225   Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>;
226 def int_aarch64_neon_vpmin :
227   Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>;
228
229 // Scalar Reduce Pairwise Floating Point Maxnm/Minnm.
230 def int_aarch64_neon_vpfmaxnm :
231   Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>;
232 def int_aarch64_neon_vpfminnm :
233   Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>;
234
235 // Scalar Signed Integer Convert To Floating-point
236 def int_aarch64_neon_vcvtint2fps :
237   Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>;
238
239 // Scalar Unsigned Integer Convert To Floating-point
240 def int_aarch64_neon_vcvtint2fpu :
241   Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>;
242
243 // Scalar Floating-point Convert
244 def int_aarch64_neon_fcvtxn :
245   Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
246 def int_aarch64_neon_fcvtns : 
247   Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
248 def int_aarch64_neon_fcvtnu :
249   Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
250 def int_aarch64_neon_fcvtps :
251   Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
252 def int_aarch64_neon_fcvtpu :
253   Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
254 def int_aarch64_neon_fcvtms :
255   Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
256 def int_aarch64_neon_fcvtmu :
257   Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
258 def int_aarch64_neon_fcvtas :
259   Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
260 def int_aarch64_neon_fcvtau :
261   Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
262 def int_aarch64_neon_fcvtzs :
263   Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
264 def int_aarch64_neon_fcvtzu :
265   Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
266
267 // Scalar Floating-point Reciprocal Estimate.
268 def int_aarch64_neon_vrecpe :
269   Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
270
271 // Scalar Floating-point Reciprocal Exponent
272 def int_aarch64_neon_vrecpx :
273   Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
274
275 // Scalar Floating-point Reciprocal Square Root Estimate
276 def int_aarch64_neon_vrsqrte :
277   Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
278
279 // Scalar Floating-point Reciprocal Step
280 def int_aarch64_neon_vrecps :
281   Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
282             [IntrNoMem]>;
283
284 // Scalar Floating-point Reciprocal Square Root Step
285 def int_aarch64_neon_vrsqrts :
286   Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
287             [IntrNoMem]>;
288
289 // Compare with vector operands.
290 class Neon_Cmp_Intrinsic :
291   Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, llvm_anyvector_ty],
292             [IntrNoMem]>;
293
294 // Floating-point compare with scalar operands.
295 class Neon_Float_Cmp_Intrinsic :
296   Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty, llvm_anyfloat_ty],
297             [IntrNoMem]>;
298
299 // Scalar Compare Equal
300 def int_aarch64_neon_vceq : Neon_Cmp_Intrinsic;
301 def int_aarch64_neon_fceq : Neon_Float_Cmp_Intrinsic;
302
303 // Scalar Compare Greater-Than or Equal
304 def int_aarch64_neon_vcge : Neon_Cmp_Intrinsic;
305 def int_aarch64_neon_vchs : Neon_Cmp_Intrinsic;
306 def int_aarch64_neon_fcge : Neon_Float_Cmp_Intrinsic;
307 def int_aarch64_neon_fchs : Neon_Float_Cmp_Intrinsic;
308
309 // Scalar Compare Less-Than or Equal
310 def int_aarch64_neon_vclez : Neon_Cmp_Intrinsic;
311 def int_aarch64_neon_fclez : Neon_Float_Cmp_Intrinsic;
312
313 // Scalar Compare Less-Than
314 def int_aarch64_neon_vcltz : Neon_Cmp_Intrinsic;
315 def int_aarch64_neon_fcltz : Neon_Float_Cmp_Intrinsic;
316
317 // Scalar Compare Greater-Than
318 def int_aarch64_neon_vcgt : Neon_Cmp_Intrinsic;
319 def int_aarch64_neon_vchi : Neon_Cmp_Intrinsic;
320 def int_aarch64_neon_fcgt : Neon_Float_Cmp_Intrinsic;
321 def int_aarch64_neon_fchi : Neon_Float_Cmp_Intrinsic;
322
323 // Scalar Compare Bitwise Test Bits
324 def int_aarch64_neon_vtstd : Neon_Cmp_Intrinsic;
325
326 // Scalar Floating-point Absolute Compare Greater Than Or Equal
327 def int_aarch64_neon_vcage : Neon_Cmp_Intrinsic;
328 def int_aarch64_neon_fcage : Neon_Float_Cmp_Intrinsic;
329
330 // Scalar Floating-point Absolute Compare Greater Than
331 def int_aarch64_neon_vcagt : Neon_Cmp_Intrinsic;
332 def int_aarch64_neon_fcagt : Neon_Float_Cmp_Intrinsic;
333
334 // Scalar Signed Saturating Accumulated of Unsigned Value
335 def int_aarch64_neon_vuqadd : Neon_2Arg_Intrinsic;
336
337 // Scalar Unsigned Saturating Accumulated of Signed Value
338 def int_aarch64_neon_vsqadd : Neon_2Arg_Intrinsic;
339
340 // Scalar Absolute Value
341 def int_aarch64_neon_vabs :
342   Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty], [IntrNoMem]>;
343
344 // Scalar Absolute Difference
345 def int_aarch64_neon_vabd :
346   Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
347             [IntrNoMem]>;
348
349 // Scalar Negate Value
350 def int_aarch64_neon_vneg :
351   Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty], [IntrNoMem]>;
352
353 // Signed Saturating Doubling Multiply-Add Long
354 def int_aarch64_neon_vqdmlal : Neon_3Arg_Long_Intrinsic;
355
356 // Signed Saturating Doubling Multiply-Subtract Long
357 def int_aarch64_neon_vqdmlsl : Neon_3Arg_Long_Intrinsic;
358
359 def int_aarch64_neon_vmull_p64 :
360   Intrinsic<[llvm_v16i8_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
361
362 class Neon_2Arg_ShiftImm_Intrinsic
363   : Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_i32_ty], [IntrNoMem]>;
364
365 class Neon_3Arg_ShiftImm_Intrinsic
366   : Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty, llvm_i32_ty],
367               [IntrNoMem]>;
368
369 // Scalar Shift Right (Immediate)
370 def int_aarch64_neon_vshrds_n : Neon_2Arg_ShiftImm_Intrinsic;
371 def int_aarch64_neon_vshrdu_n : Neon_2Arg_ShiftImm_Intrinsic;
372
373 // Scalar Shift Right and Accumulate (Immediate)
374 def int_aarch64_neon_vsrads_n : Neon_3Arg_ShiftImm_Intrinsic;
375 def int_aarch64_neon_vsradu_n : Neon_3Arg_ShiftImm_Intrinsic;
376
377 // Scalar Rounding Shift Right and Accumulate (Immediate)
378 def int_aarch64_neon_vrsrads_n : Neon_3Arg_ShiftImm_Intrinsic;
379 def int_aarch64_neon_vrsradu_n : Neon_3Arg_ShiftImm_Intrinsic;
380
381 // Scalar Shift Left (Immediate)
382 def int_aarch64_neon_vshld_n : Neon_2Arg_ShiftImm_Intrinsic;
383
384 // Scalar Saturating Shift Left (Immediate)
385 def int_aarch64_neon_vqshls_n : Neon_N2V_Intrinsic;
386 def int_aarch64_neon_vqshlu_n : Neon_N2V_Intrinsic;
387
388 // Scalar Signed Saturating Shift Left Unsigned (Immediate)
389 def int_aarch64_neon_vqshlus_n : Neon_N2V_Intrinsic;
390
391 // Scalar Signed Fixed-point Convert To Floating-Point (Immediate)
392 def int_aarch64_neon_vcvtfxs2fp_n :
393   Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty, llvm_i32_ty], [IntrNoMem]>;
394
395 // Scalar Unsigned Fixed-point Convert To Floating-Point (Immediate)
396 def int_aarch64_neon_vcvtfxu2fp_n :
397   Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty, llvm_i32_ty], [IntrNoMem]>;
398
399 // Scalar Floating-point Convert To Signed Fixed-point (Immediate)
400 def int_aarch64_neon_vcvtfp2fxs_n :
401   Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty, llvm_i32_ty], [IntrNoMem]>;
402
403 // Scalar Floating-point Convert To Unsigned Fixed-point (Immediate)
404 def int_aarch64_neon_vcvtfp2fxu_n :
405   Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty, llvm_i32_ty], [IntrNoMem]>;
406
407 }