1 //===- NVPTXInstrInfo.td - NVPTX Instruction defs -------------*- tblgen-*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file describes the PTX instructions in TableGen format.
12 //===----------------------------------------------------------------------===//
14 include "NVPTXInstrFormats.td"
17 def NOP : NVPTXInst<(outs), (ins), "", []>;
19 // List of vector specific properties
20 def isVecLD : VecInstTypeEnum<1>;
21 def isVecST : VecInstTypeEnum<2>;
22 def isVecBuild : VecInstTypeEnum<3>;
23 def isVecShuffle : VecInstTypeEnum<4>;
24 def isVecExtract : VecInstTypeEnum<5>;
25 def isVecInsert : VecInstTypeEnum<6>;
26 def isVecDest : VecInstTypeEnum<7>;
27 def isVecOther : VecInstTypeEnum<15>;
29 //===----------------------------------------------------------------------===//
30 // NVPTX Operand Definitions.
31 //===----------------------------------------------------------------------===//
33 def brtarget : Operand<OtherVT>;
35 // CVT conversion modes
36 // These must match the enum in NVPTX.h
37 def CvtNONE : PatLeaf<(i32 0x0)>;
38 def CvtRNI : PatLeaf<(i32 0x1)>;
39 def CvtRZI : PatLeaf<(i32 0x2)>;
40 def CvtRMI : PatLeaf<(i32 0x3)>;
41 def CvtRPI : PatLeaf<(i32 0x4)>;
42 def CvtRN : PatLeaf<(i32 0x5)>;
43 def CvtRZ : PatLeaf<(i32 0x6)>;
44 def CvtRM : PatLeaf<(i32 0x7)>;
45 def CvtRP : PatLeaf<(i32 0x8)>;
47 def CvtNONE_FTZ : PatLeaf<(i32 0x10)>;
48 def CvtRNI_FTZ : PatLeaf<(i32 0x11)>;
49 def CvtRZI_FTZ : PatLeaf<(i32 0x12)>;
50 def CvtRMI_FTZ : PatLeaf<(i32 0x13)>;
51 def CvtRPI_FTZ : PatLeaf<(i32 0x14)>;
52 def CvtRN_FTZ : PatLeaf<(i32 0x15)>;
53 def CvtRZ_FTZ : PatLeaf<(i32 0x16)>;
54 def CvtRM_FTZ : PatLeaf<(i32 0x17)>;
55 def CvtRP_FTZ : PatLeaf<(i32 0x18)>;
57 def CvtSAT : PatLeaf<(i32 0x20)>;
58 def CvtSAT_FTZ : PatLeaf<(i32 0x30)>;
60 def CvtMode : Operand<i32> {
61 let PrintMethod = "printCvtMode";
65 // These must match the enum in NVPTX.h
66 def CmpEQ : PatLeaf<(i32 0)>;
67 def CmpNE : PatLeaf<(i32 1)>;
68 def CmpLT : PatLeaf<(i32 2)>;
69 def CmpLE : PatLeaf<(i32 3)>;
70 def CmpGT : PatLeaf<(i32 4)>;
71 def CmpGE : PatLeaf<(i32 5)>;
72 def CmpLO : PatLeaf<(i32 6)>;
73 def CmpLS : PatLeaf<(i32 7)>;
74 def CmpHI : PatLeaf<(i32 8)>;
75 def CmpHS : PatLeaf<(i32 9)>;
76 def CmpEQU : PatLeaf<(i32 10)>;
77 def CmpNEU : PatLeaf<(i32 11)>;
78 def CmpLTU : PatLeaf<(i32 12)>;
79 def CmpLEU : PatLeaf<(i32 13)>;
80 def CmpGTU : PatLeaf<(i32 14)>;
81 def CmpGEU : PatLeaf<(i32 15)>;
82 def CmpNUM : PatLeaf<(i32 16)>;
83 def CmpNAN : PatLeaf<(i32 17)>;
85 def CmpEQ_FTZ : PatLeaf<(i32 0x100)>;
86 def CmpNE_FTZ : PatLeaf<(i32 0x101)>;
87 def CmpLT_FTZ : PatLeaf<(i32 0x102)>;
88 def CmpLE_FTZ : PatLeaf<(i32 0x103)>;
89 def CmpGT_FTZ : PatLeaf<(i32 0x104)>;
90 def CmpGE_FTZ : PatLeaf<(i32 0x105)>;
91 def CmpLO_FTZ : PatLeaf<(i32 0x106)>;
92 def CmpLS_FTZ : PatLeaf<(i32 0x107)>;
93 def CmpHI_FTZ : PatLeaf<(i32 0x108)>;
94 def CmpHS_FTZ : PatLeaf<(i32 0x109)>;
95 def CmpEQU_FTZ : PatLeaf<(i32 0x10A)>;
96 def CmpNEU_FTZ : PatLeaf<(i32 0x10B)>;
97 def CmpLTU_FTZ : PatLeaf<(i32 0x10C)>;
98 def CmpLEU_FTZ : PatLeaf<(i32 0x10D)>;
99 def CmpGTU_FTZ : PatLeaf<(i32 0x10E)>;
100 def CmpGEU_FTZ : PatLeaf<(i32 0x10F)>;
101 def CmpNUM_FTZ : PatLeaf<(i32 0x110)>;
102 def CmpNAN_FTZ : PatLeaf<(i32 0x111)>;
104 def CmpMode : Operand<i32> {
105 let PrintMethod = "printCmpMode";
108 def F32ConstZero : Operand<f32>, PatLeaf<(f32 fpimm)>, SDNodeXForm<fpimm, [{
109 return CurDAG->getTargetConstantFP(0.0, MVT::f32);
111 def F32ConstOne : Operand<f32>, PatLeaf<(f32 fpimm)>, SDNodeXForm<fpimm, [{
112 return CurDAG->getTargetConstantFP(1.0, MVT::f32);
115 //===----------------------------------------------------------------------===//
116 // NVPTX Instruction Predicate Definitions
117 //===----------------------------------------------------------------------===//
120 def hasAtomRedG32 : Predicate<"Subtarget.hasAtomRedG32()">;
121 def hasAtomRedS32 : Predicate<"Subtarget.hasAtomRedS32()">;
122 def hasAtomRedGen32 : Predicate<"Subtarget.hasAtomRedGen32()">;
123 def useAtomRedG32forGen32 :
124 Predicate<"!Subtarget.hasAtomRedGen32() && Subtarget.hasAtomRedG32()">;
125 def hasBrkPt : Predicate<"Subtarget.hasBrkPt()">;
126 def hasAtomRedG64 : Predicate<"Subtarget.hasAtomRedG64()">;
127 def hasAtomRedS64 : Predicate<"Subtarget.hasAtomRedS64()">;
128 def hasAtomRedGen64 : Predicate<"Subtarget.hasAtomRedGen64()">;
129 def useAtomRedG64forGen64 :
130 Predicate<"!Subtarget.hasAtomRedGen64() && Subtarget.hasAtomRedG64()">;
131 def hasAtomAddF32 : Predicate<"Subtarget.hasAtomAddF32()">;
132 def hasVote : Predicate<"Subtarget.hasVote()">;
133 def hasDouble : Predicate<"Subtarget.hasDouble()">;
134 def reqPTX20 : Predicate<"Subtarget.reqPTX20()">;
135 def hasLDG : Predicate<"Subtarget.hasLDG()">;
136 def hasLDU : Predicate<"Subtarget.hasLDU()">;
137 def hasGenericLdSt : Predicate<"Subtarget.hasGenericLdSt()">;
139 def doF32FTZ : Predicate<"useF32FTZ()">;
140 def doNoF32FTZ : Predicate<"!useF32FTZ()">;
142 def doFMAF32 : Predicate<"doFMAF32">;
143 def doFMAF32_ftz : Predicate<"(doFMAF32 && useF32FTZ())">;
144 def doFMAF32AGG : Predicate<"doFMAF32AGG">;
145 def doFMAF32AGG_ftz : Predicate<"(doFMAF32AGG && useF32FTZ())">;
146 def doFMAF64 : Predicate<"doFMAF64">;
147 def doFMAF64AGG : Predicate<"doFMAF64AGG">;
149 def doMulWide : Predicate<"doMulWide">;
151 def allowFMA : Predicate<"allowFMA">;
152 def allowFMA_ftz : Predicate<"(allowFMA && useF32FTZ())">;
154 def do_DIVF32_APPROX : Predicate<"getDivF32Level()==0">;
155 def do_DIVF32_FULL : Predicate<"getDivF32Level()==1">;
157 def do_SQRTF32_APPROX : Predicate<"!usePrecSqrtF32()">;
158 def do_SQRTF32_RN : Predicate<"usePrecSqrtF32()">;
160 def hasHWROT32 : Predicate<"Subtarget.hasHWROT32()">;
161 def noHWROT32 : Predicate<"!Subtarget.hasHWROT32()">;
163 def true : Predicate<"1">;
165 def hasPTX31 : Predicate<"Subtarget.getPTXVersion() >= 31">;
168 //===----------------------------------------------------------------------===//
169 // Some Common Instruction Class Templates
170 //===----------------------------------------------------------------------===//
172 multiclass I3<string OpcStr, SDNode OpNode> {
173 def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
174 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
175 [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
177 def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
178 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
179 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
180 def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
181 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
182 [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
184 def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
185 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
186 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
187 def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
188 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
189 [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
191 def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
192 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
193 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>;
196 multiclass ADD_SUB_INT_32<string OpcStr, SDNode OpNode> {
197 def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
199 !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
200 [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
202 def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
203 !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
204 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
207 multiclass F3<string OpcStr, SDNode OpNode> {
208 def f64rr : NVPTXInst<(outs Float64Regs:$dst),
209 (ins Float64Regs:$a, Float64Regs:$b),
210 !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"),
211 [(set Float64Regs:$dst,
212 (OpNode Float64Regs:$a, Float64Regs:$b))]>,
213 Requires<[allowFMA]>;
214 def f64ri : NVPTXInst<(outs Float64Regs:$dst),
215 (ins Float64Regs:$a, f64imm:$b),
216 !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"),
217 [(set Float64Regs:$dst,
218 (OpNode Float64Regs:$a, fpimm:$b))]>,
219 Requires<[allowFMA]>;
220 def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
221 (ins Float32Regs:$a, Float32Regs:$b),
222 !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
223 [(set Float32Regs:$dst,
224 (OpNode Float32Regs:$a, Float32Regs:$b))]>,
225 Requires<[allowFMA_ftz]>;
226 def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
227 (ins Float32Regs:$a, f32imm:$b),
228 !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
229 [(set Float32Regs:$dst,
230 (OpNode Float32Regs:$a, fpimm:$b))]>,
231 Requires<[allowFMA_ftz]>;
232 def f32rr : NVPTXInst<(outs Float32Regs:$dst),
233 (ins Float32Regs:$a, Float32Regs:$b),
234 !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
235 [(set Float32Regs:$dst,
236 (OpNode Float32Regs:$a, Float32Regs:$b))]>,
237 Requires<[allowFMA]>;
238 def f32ri : NVPTXInst<(outs Float32Regs:$dst),
239 (ins Float32Regs:$a, f32imm:$b),
240 !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
241 [(set Float32Regs:$dst,
242 (OpNode Float32Regs:$a, fpimm:$b))]>,
243 Requires<[allowFMA]>;
246 multiclass F3_rn<string OpcStr, SDNode OpNode> {
247 def f64rr : NVPTXInst<(outs Float64Regs:$dst),
248 (ins Float64Regs:$a, Float64Regs:$b),
249 !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"),
250 [(set Float64Regs:$dst,
251 (OpNode Float64Regs:$a, Float64Regs:$b))]>;
252 def f64ri : NVPTXInst<(outs Float64Regs:$dst),
253 (ins Float64Regs:$a, f64imm:$b),
254 !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"),
255 [(set Float64Regs:$dst,
256 (OpNode Float64Regs:$a, fpimm:$b))]>;
257 def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
258 (ins Float32Regs:$a, Float32Regs:$b),
259 !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"),
260 [(set Float32Regs:$dst,
261 (OpNode Float32Regs:$a, Float32Regs:$b))]>,
262 Requires<[doF32FTZ]>;
263 def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
264 (ins Float32Regs:$a, f32imm:$b),
265 !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"),
266 [(set Float32Regs:$dst,
267 (OpNode Float32Regs:$a, fpimm:$b))]>,
268 Requires<[doF32FTZ]>;
269 def f32rr : NVPTXInst<(outs Float32Regs:$dst),
270 (ins Float32Regs:$a, Float32Regs:$b),
271 !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"),
272 [(set Float32Regs:$dst,
273 (OpNode Float32Regs:$a, Float32Regs:$b))]>;
274 def f32ri : NVPTXInst<(outs Float32Regs:$dst),
275 (ins Float32Regs:$a, f32imm:$b),
276 !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"),
277 [(set Float32Regs:$dst,
278 (OpNode Float32Regs:$a, fpimm:$b))]>;
281 multiclass F2<string OpcStr, SDNode OpNode> {
282 def f64 : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$a),
283 !strconcat(OpcStr, ".f64 \t$dst, $a;"),
284 [(set Float64Regs:$dst, (OpNode Float64Regs:$a))]>;
285 def f32_ftz : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a),
286 !strconcat(OpcStr, ".ftz.f32 \t$dst, $a;"),
287 [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>,
288 Requires<[doF32FTZ]>;
289 def f32 : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a),
290 !strconcat(OpcStr, ".f32 \t$dst, $a;"),
291 [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>;
294 //===----------------------------------------------------------------------===//
295 // NVPTX Instructions.
296 //===----------------------------------------------------------------------===//
298 //-----------------------------------
299 // General Type Conversion
300 //-----------------------------------
302 let neverHasSideEffects = 1 in {
303 // Generate a cvt to the given type from all possible types.
304 // Each instance takes a CvtMode immediate that defines the conversion mode to
305 // use. It can be CvtNONE to omit a conversion mode.
306 multiclass CVT_FROM_ALL<string FromName, RegisterClass RC> {
307 def _s16 : NVPTXInst<(outs RC:$dst),
308 (ins Int16Regs:$src, CvtMode:$mode),
309 !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
310 FromName, ".s16\t$dst, $src;"),
312 def _u16 : NVPTXInst<(outs RC:$dst),
313 (ins Int16Regs:$src, CvtMode:$mode),
314 !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
315 FromName, ".u16\t$dst, $src;"),
317 def _f16 : NVPTXInst<(outs RC:$dst),
318 (ins Int16Regs:$src, CvtMode:$mode),
319 !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
320 FromName, ".f16\t$dst, $src;"),
322 def _s32 : NVPTXInst<(outs RC:$dst),
323 (ins Int32Regs:$src, CvtMode:$mode),
324 !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
325 FromName, ".s32\t$dst, $src;"),
327 def _u32 : NVPTXInst<(outs RC:$dst),
328 (ins Int32Regs:$src, CvtMode:$mode),
329 !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
330 FromName, ".u32\t$dst, $src;"),
332 def _s64 : NVPTXInst<(outs RC:$dst),
333 (ins Int64Regs:$src, CvtMode:$mode),
334 !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
335 FromName, ".s64\t$dst, $src;"),
337 def _u64 : NVPTXInst<(outs RC:$dst),
338 (ins Int64Regs:$src, CvtMode:$mode),
339 !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
340 FromName, ".u64\t$dst, $src;"),
342 def _f32 : NVPTXInst<(outs RC:$dst),
343 (ins Float32Regs:$src, CvtMode:$mode),
344 !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
345 FromName, ".f32\t$dst, $src;"),
347 def _f64 : NVPTXInst<(outs RC:$dst),
348 (ins Float64Regs:$src, CvtMode:$mode),
349 !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
350 FromName, ".f64\t$dst, $src;"),
354 // Generate a cvt to all possible types.
355 defm CVT_s16 : CVT_FROM_ALL<"s16", Int16Regs>;
356 defm CVT_u16 : CVT_FROM_ALL<"u16", Int16Regs>;
357 defm CVT_f16 : CVT_FROM_ALL<"f16", Int16Regs>;
358 defm CVT_s32 : CVT_FROM_ALL<"s32", Int32Regs>;
359 defm CVT_u32 : CVT_FROM_ALL<"u32", Int32Regs>;
360 defm CVT_s64 : CVT_FROM_ALL<"s64", Int64Regs>;
361 defm CVT_u64 : CVT_FROM_ALL<"u64", Int64Regs>;
362 defm CVT_f32 : CVT_FROM_ALL<"f32", Float32Regs>;
363 defm CVT_f64 : CVT_FROM_ALL<"f64", Float64Regs>;
365 // This set of cvt is different from the above. The type of the source
366 // and target are the same.
368 def CVT_INREG_s16_s8 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
369 "cvt.s16.s8 \t$dst, $src;", []>;
370 def CVT_INREG_s32_s8 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
371 "cvt.s32.s8 \t$dst, $src;", []>;
372 def CVT_INREG_s32_s16 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
373 "cvt.s32.s16 \t$dst, $src;", []>;
374 def CVT_INREG_s64_s8 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
375 "cvt.s64.s8 \t$dst, $src;", []>;
376 def CVT_INREG_s64_s16 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
377 "cvt.s64.s16 \t$dst, $src;", []>;
378 def CVT_INREG_s64_s32 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
379 "cvt.s64.s32 \t$dst, $src;", []>;
382 //-----------------------------------
383 // Integer Arithmetic
384 //-----------------------------------
386 multiclass ADD_SUB_i1<SDNode OpNode> {
387 def _rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b),
388 "xor.pred \t$dst, $a, $b;",
389 [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>;
390 def _ri: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b),
391 "xor.pred \t$dst, $a, $b;",
392 [(set Int1Regs:$dst, (OpNode Int1Regs:$a, (imm):$b))]>;
395 defm ADD_i1 : ADD_SUB_i1<add>;
396 defm SUB_i1 : ADD_SUB_i1<sub>;
399 defm ADD : I3<"add.s", add>;
400 defm SUB : I3<"sub.s", sub>;
402 defm ADDCC : ADD_SUB_INT_32<"add.cc", addc>;
403 defm SUBCC : ADD_SUB_INT_32<"sub.cc", subc>;
405 defm ADDCCC : ADD_SUB_INT_32<"addc.cc", adde>;
406 defm SUBCCC : ADD_SUB_INT_32<"subc.cc", sube>;
408 //mul.wide PTX instruction
409 def SInt32Const : PatLeaf<(imm), [{
410 const APInt &v = N->getAPIntValue();
411 if (v.isSignedIntN(32))
416 def UInt32Const : PatLeaf<(imm), [{
417 const APInt &v = N->getAPIntValue();
423 def SInt16Const : PatLeaf<(imm), [{
424 const APInt &v = N->getAPIntValue();
425 if (v.isSignedIntN(16))
430 def UInt16Const : PatLeaf<(imm), [{
431 const APInt &v = N->getAPIntValue();
437 def Int5Const : PatLeaf<(imm), [{
438 const APInt &v = N->getAPIntValue();
439 // Check if 0 <= v < 32
440 // Only then the result from (x << v) will be i32
441 if (v.sge(0) && v.slt(32))
446 def Int4Const : PatLeaf<(imm), [{
447 const APInt &v = N->getAPIntValue();
448 // Check if 0 <= v < 16
449 // Only then the result from (x << v) will be i16
450 if (v.sge(0) && v.slt(16))
455 def SHL2MUL32 : SDNodeXForm<imm, [{
456 const APInt &v = N->getAPIntValue();
458 return CurDAG->getTargetConstant(temp.shl(v), MVT::i32);
461 def SHL2MUL16 : SDNodeXForm<imm, [{
462 const APInt &v = N->getAPIntValue();
464 return CurDAG->getTargetConstant(temp.shl(v), MVT::i16);
468 : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
469 "mul.wide.s32 \t$dst, $a, $b;", []>;
471 : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
472 "mul.wide.s32 \t$dst, $a, $b;", []>;
474 : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i64imm:$b),
475 "mul.wide.s32 \t$dst, $a, $b;", []>;
478 : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
479 "mul.wide.u32 \t$dst, $a, $b;", []>;
481 : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
482 "mul.wide.u32 \t$dst, $a, $b;", []>;
484 : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i64imm:$b),
485 "mul.wide.u32 \t$dst, $a, $b;", []>;
488 : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
489 "mul.wide.s16 \t$dst, $a, $b;", []>;
491 : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
492 "mul.wide.s16 \t$dst, $a, $b;", []>;
494 : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
495 "mul.wide.s16 \t$dst, $a, $b;", []>;
498 : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
499 "mul.wide.u16 \t$dst, $a, $b;", []>;
501 : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
502 "mul.wide.u16 \t$dst, $a, $b;", []>;
504 : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
505 "mul.wide.u16 \t$dst, $a, $b;", []>;
507 def : Pat<(shl (sext Int32Regs:$a), (i32 Int5Const:$b)),
508 (MULWIDES64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>,
509 Requires<[doMulWide]>;
510 def : Pat<(shl (zext Int32Regs:$a), (i32 Int5Const:$b)),
511 (MULWIDEU64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>,
512 Requires<[doMulWide]>;
514 def : Pat<(shl (sext Int16Regs:$a), (i16 Int4Const:$b)),
515 (MULWIDES32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>,
516 Requires<[doMulWide]>;
517 def : Pat<(shl (zext Int16Regs:$a), (i16 Int4Const:$b)),
518 (MULWIDEU32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>,
519 Requires<[doMulWide]>;
521 def : Pat<(mul (sext Int32Regs:$a), (sext Int32Regs:$b)),
522 (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>,
523 Requires<[doMulWide]>;
524 def : Pat<(mul (sext Int32Regs:$a), (i64 SInt32Const:$b)),
525 (MULWIDES64Imm64 Int32Regs:$a, (i64 SInt32Const:$b))>,
526 Requires<[doMulWide]>;
528 def : Pat<(mul (zext Int32Regs:$a), (zext Int32Regs:$b)),
529 (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>,
530 Requires<[doMulWide]>;
531 def : Pat<(mul (zext Int32Regs:$a), (i64 UInt32Const:$b)),
532 (MULWIDEU64Imm64 Int32Regs:$a, (i64 UInt32Const:$b))>,
533 Requires<[doMulWide]>;
535 def : Pat<(mul (sext Int16Regs:$a), (sext Int16Regs:$b)),
536 (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>,
537 Requires<[doMulWide]>;
538 def : Pat<(mul (sext Int16Regs:$a), (i32 SInt16Const:$b)),
539 (MULWIDES32Imm32 Int16Regs:$a, (i32 SInt16Const:$b))>,
540 Requires<[doMulWide]>;
542 def : Pat<(mul (zext Int16Regs:$a), (zext Int16Regs:$b)),
543 (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>,
544 Requires<[doMulWide]>;
545 def : Pat<(mul (zext Int16Regs:$a), (i32 UInt16Const:$b)),
546 (MULWIDEU32Imm32 Int16Regs:$a, (i32 UInt16Const:$b))>,
547 Requires<[doMulWide]>;
551 : SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>]>;
553 : SDNode<"NVPTXISD::MUL_WIDE_SIGNED", SDTMulWide>;
554 def mul_wide_unsigned
555 : SDNode<"NVPTXISD::MUL_WIDE_UNSIGNED", SDTMulWide>;
557 def : Pat<(i32 (mul_wide_signed Int16Regs:$a, Int16Regs:$b)),
558 (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>,
559 Requires<[doMulWide]>;
560 def : Pat<(i32 (mul_wide_signed Int16Regs:$a, imm:$b)),
561 (MULWIDES32Imm Int16Regs:$a, imm:$b)>,
562 Requires<[doMulWide]>;
563 def : Pat<(i32 (mul_wide_unsigned Int16Regs:$a, Int16Regs:$b)),
564 (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>,
565 Requires<[doMulWide]>;
566 def : Pat<(i32 (mul_wide_unsigned Int16Regs:$a, imm:$b)),
567 (MULWIDEU32Imm Int16Regs:$a, imm:$b)>,
568 Requires<[doMulWide]>;
571 def : Pat<(i64 (mul_wide_signed Int32Regs:$a, Int32Regs:$b)),
572 (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>,
573 Requires<[doMulWide]>;
574 def : Pat<(i64 (mul_wide_signed Int32Regs:$a, imm:$b)),
575 (MULWIDES64Imm Int32Regs:$a, imm:$b)>,
576 Requires<[doMulWide]>;
577 def : Pat<(i64 (mul_wide_unsigned Int32Regs:$a, Int32Regs:$b)),
578 (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>,
579 Requires<[doMulWide]>;
580 def : Pat<(i64 (mul_wide_unsigned Int32Regs:$a, imm:$b)),
581 (MULWIDEU64Imm Int32Regs:$a, imm:$b)>,
582 Requires<[doMulWide]>;
584 defm MULT : I3<"mul.lo.s", mul>;
586 defm MULTHS : I3<"mul.hi.s", mulhs>;
587 defm MULTHU : I3<"mul.hi.u", mulhu>;
589 defm SDIV : I3<"div.s", sdiv>;
590 defm UDIV : I3<"div.u", udiv>;
592 defm SREM : I3<"rem.s", srem>;
593 // The ri version will not be selected as DAGCombiner::visitSREM will lower it.
594 defm UREM : I3<"rem.u", urem>;
595 // The ri version will not be selected as DAGCombiner::visitUREM will lower it.
598 : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>,
599 SDTCisInt<2>, SDTCisSameAs<0, 2>,
600 SDTCisSameAs<0, 3>]>;
602 : SDNode<"NVPTXISD::IMAD", SDTIMAD>;
604 def MAD16rrr : NVPTXInst<(outs Int16Regs:$dst),
605 (ins Int16Regs:$a, Int16Regs:$b, Int16Regs:$c),
606 "mad.lo.s16 \t$dst, $a, $b, $c;",
607 [(set Int16Regs:$dst,
608 (imad Int16Regs:$a, Int16Regs:$b, Int16Regs:$c))]>;
609 def MAD16rri : NVPTXInst<(outs Int16Regs:$dst),
610 (ins Int16Regs:$a, Int16Regs:$b, i16imm:$c),
611 "mad.lo.s16 \t$dst, $a, $b, $c;",
612 [(set Int16Regs:$dst,
613 (imad Int16Regs:$a, Int16Regs:$b, imm:$c))]>;
614 def MAD16rir : NVPTXInst<(outs Int16Regs:$dst),
615 (ins Int16Regs:$a, i16imm:$b, Int16Regs:$c),
616 "mad.lo.s16 \t$dst, $a, $b, $c;",
617 [(set Int16Regs:$dst,
618 (imad Int16Regs:$a, imm:$b, Int16Regs:$c))]>;
619 def MAD16rii : NVPTXInst<(outs Int16Regs:$dst),
620 (ins Int16Regs:$a, i16imm:$b, i16imm:$c),
621 "mad.lo.s16 \t$dst, $a, $b, $c;",
622 [(set Int16Regs:$dst,
623 (imad Int16Regs:$a, imm:$b, imm:$c))]>;
625 def MAD32rrr : NVPTXInst<(outs Int32Regs:$dst),
626 (ins Int32Regs:$a, Int32Regs:$b, Int32Regs:$c),
627 "mad.lo.s32 \t$dst, $a, $b, $c;",
628 [(set Int32Regs:$dst,
629 (imad Int32Regs:$a, Int32Regs:$b, Int32Regs:$c))]>;
630 def MAD32rri : NVPTXInst<(outs Int32Regs:$dst),
631 (ins Int32Regs:$a, Int32Regs:$b, i32imm:$c),
632 "mad.lo.s32 \t$dst, $a, $b, $c;",
633 [(set Int32Regs:$dst,
634 (imad Int32Regs:$a, Int32Regs:$b, imm:$c))]>;
635 def MAD32rir : NVPTXInst<(outs Int32Regs:$dst),
636 (ins Int32Regs:$a, i32imm:$b, Int32Regs:$c),
637 "mad.lo.s32 \t$dst, $a, $b, $c;",
638 [(set Int32Regs:$dst,
639 (imad Int32Regs:$a, imm:$b, Int32Regs:$c))]>;
640 def MAD32rii : NVPTXInst<(outs Int32Regs:$dst),
641 (ins Int32Regs:$a, i32imm:$b, i32imm:$c),
642 "mad.lo.s32 \t$dst, $a, $b, $c;",
643 [(set Int32Regs:$dst,
644 (imad Int32Regs:$a, imm:$b, imm:$c))]>;
646 def MAD64rrr : NVPTXInst<(outs Int64Regs:$dst),
647 (ins Int64Regs:$a, Int64Regs:$b, Int64Regs:$c),
648 "mad.lo.s64 \t$dst, $a, $b, $c;",
649 [(set Int64Regs:$dst,
650 (imad Int64Regs:$a, Int64Regs:$b, Int64Regs:$c))]>;
651 def MAD64rri : NVPTXInst<(outs Int64Regs:$dst),
652 (ins Int64Regs:$a, Int64Regs:$b, i64imm:$c),
653 "mad.lo.s64 \t$dst, $a, $b, $c;",
654 [(set Int64Regs:$dst,
655 (imad Int64Regs:$a, Int64Regs:$b, imm:$c))]>;
656 def MAD64rir : NVPTXInst<(outs Int64Regs:$dst),
657 (ins Int64Regs:$a, i64imm:$b, Int64Regs:$c),
658 "mad.lo.s64 \t$dst, $a, $b, $c;",
659 [(set Int64Regs:$dst,
660 (imad Int64Regs:$a, imm:$b, Int64Regs:$c))]>;
661 def MAD64rii : NVPTXInst<(outs Int64Regs:$dst),
662 (ins Int64Regs:$a, i64imm:$b, i64imm:$c),
663 "mad.lo.s64 \t$dst, $a, $b, $c;",
664 [(set Int64Regs:$dst,
665 (imad Int64Regs:$a, imm:$b, imm:$c))]>;
667 def INEG16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
668 "neg.s16 \t$dst, $src;",
669 [(set Int16Regs:$dst, (ineg Int16Regs:$src))]>;
670 def INEG32 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
671 "neg.s32 \t$dst, $src;",
672 [(set Int32Regs:$dst, (ineg Int32Regs:$src))]>;
673 def INEG64 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
674 "neg.s64 \t$dst, $src;",
675 [(set Int64Regs:$dst, (ineg Int64Regs:$src))]>;
677 //-----------------------------------
678 // Floating Point Arithmetic
679 //-----------------------------------
682 def FloatConst1 : PatLeaf<(fpimm), [{
683 if (&(N->getValueAPF().getSemantics()) != &llvm::APFloat::IEEEsingle)
685 float f = (float)N->getValueAPF().convertToFloat();
688 // Constand (double)1.0
689 def DoubleConst1 : PatLeaf<(fpimm), [{
690 if (&(N->getValueAPF().getSemantics()) != &llvm::APFloat::IEEEdouble)
692 double d = (double)N->getValueAPF().convertToDouble();
696 defm FADD : F3<"add", fadd>;
697 defm FSUB : F3<"sub", fsub>;
698 defm FMUL : F3<"mul", fmul>;
700 defm FADD_rn : F3_rn<"add", fadd>;
701 defm FSUB_rn : F3_rn<"sub", fsub>;
702 defm FMUL_rn : F3_rn<"mul", fmul>;
704 defm FABS : F2<"abs", fabs>;
705 defm FNEG : F2<"neg", fneg>;
706 defm FSQRT : F2<"sqrt.rn", fsqrt>;
711 def FDIV641r : NVPTXInst<(outs Float64Regs:$dst),
712 (ins f64imm:$a, Float64Regs:$b),
713 "rcp.rn.f64 \t$dst, $b;",
714 [(set Float64Regs:$dst,
715 (fdiv DoubleConst1:$a, Float64Regs:$b))]>;
716 def FDIV64rr : NVPTXInst<(outs Float64Regs:$dst),
717 (ins Float64Regs:$a, Float64Regs:$b),
718 "div.rn.f64 \t$dst, $a, $b;",
719 [(set Float64Regs:$dst,
720 (fdiv Float64Regs:$a, Float64Regs:$b))]>;
721 def FDIV64ri : NVPTXInst<(outs Float64Regs:$dst),
722 (ins Float64Regs:$a, f64imm:$b),
723 "div.rn.f64 \t$dst, $a, $b;",
724 [(set Float64Regs:$dst,
725 (fdiv Float64Regs:$a, fpimm:$b))]>;
728 // F32 Approximate reciprocal
730 def FDIV321r_ftz : NVPTXInst<(outs Float32Regs:$dst),
731 (ins f32imm:$a, Float32Regs:$b),
732 "rcp.approx.ftz.f32 \t$dst, $b;",
733 [(set Float32Regs:$dst,
734 (fdiv FloatConst1:$a, Float32Regs:$b))]>,
735 Requires<[do_DIVF32_APPROX, doF32FTZ]>;
736 def FDIV321r : NVPTXInst<(outs Float32Regs:$dst),
737 (ins f32imm:$a, Float32Regs:$b),
738 "rcp.approx.f32 \t$dst, $b;",
739 [(set Float32Regs:$dst,
740 (fdiv FloatConst1:$a, Float32Regs:$b))]>,
741 Requires<[do_DIVF32_APPROX]>;
743 // F32 Approximate division
745 def FDIV32approxrr_ftz : NVPTXInst<(outs Float32Regs:$dst),
746 (ins Float32Regs:$a, Float32Regs:$b),
747 "div.approx.ftz.f32 \t$dst, $a, $b;",
748 [(set Float32Regs:$dst,
749 (fdiv Float32Regs:$a, Float32Regs:$b))]>,
750 Requires<[do_DIVF32_APPROX, doF32FTZ]>;
751 def FDIV32approxri_ftz : NVPTXInst<(outs Float32Regs:$dst),
752 (ins Float32Regs:$a, f32imm:$b),
753 "div.approx.ftz.f32 \t$dst, $a, $b;",
754 [(set Float32Regs:$dst,
755 (fdiv Float32Regs:$a, fpimm:$b))]>,
756 Requires<[do_DIVF32_APPROX, doF32FTZ]>;
757 def FDIV32approxrr : NVPTXInst<(outs Float32Regs:$dst),
758 (ins Float32Regs:$a, Float32Regs:$b),
759 "div.approx.f32 \t$dst, $a, $b;",
760 [(set Float32Regs:$dst,
761 (fdiv Float32Regs:$a, Float32Regs:$b))]>,
762 Requires<[do_DIVF32_APPROX]>;
763 def FDIV32approxri : NVPTXInst<(outs Float32Regs:$dst),
764 (ins Float32Regs:$a, f32imm:$b),
765 "div.approx.f32 \t$dst, $a, $b;",
766 [(set Float32Regs:$dst,
767 (fdiv Float32Regs:$a, fpimm:$b))]>,
768 Requires<[do_DIVF32_APPROX]>;
770 // F32 Semi-accurate reciprocal
772 // rcp.approx gives the same result as div.full(1.0f, a) and is faster.
774 def FDIV321r_approx_ftz : NVPTXInst<(outs Float32Regs:$dst),
775 (ins f32imm:$a, Float32Regs:$b),
776 "rcp.approx.ftz.f32 \t$dst, $b;",
777 [(set Float32Regs:$dst,
778 (fdiv FloatConst1:$a, Float32Regs:$b))]>,
779 Requires<[do_DIVF32_FULL, doF32FTZ]>;
780 def FDIV321r_approx : NVPTXInst<(outs Float32Regs:$dst),
781 (ins f32imm:$a, Float32Regs:$b),
782 "rcp.approx.f32 \t$dst, $b;",
783 [(set Float32Regs:$dst,
784 (fdiv FloatConst1:$a, Float32Regs:$b))]>,
785 Requires<[do_DIVF32_FULL]>;
787 // F32 Semi-accurate division
789 def FDIV32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
790 (ins Float32Regs:$a, Float32Regs:$b),
791 "div.full.ftz.f32 \t$dst, $a, $b;",
792 [(set Float32Regs:$dst,
793 (fdiv Float32Regs:$a, Float32Regs:$b))]>,
794 Requires<[do_DIVF32_FULL, doF32FTZ]>;
795 def FDIV32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
796 (ins Float32Regs:$a, f32imm:$b),
797 "div.full.ftz.f32 \t$dst, $a, $b;",
798 [(set Float32Regs:$dst,
799 (fdiv Float32Regs:$a, fpimm:$b))]>,
800 Requires<[do_DIVF32_FULL, doF32FTZ]>;
801 def FDIV32rr : NVPTXInst<(outs Float32Regs:$dst),
802 (ins Float32Regs:$a, Float32Regs:$b),
803 "div.full.f32 \t$dst, $a, $b;",
804 [(set Float32Regs:$dst,
805 (fdiv Float32Regs:$a, Float32Regs:$b))]>,
806 Requires<[do_DIVF32_FULL]>;
807 def FDIV32ri : NVPTXInst<(outs Float32Regs:$dst),
808 (ins Float32Regs:$a, f32imm:$b),
809 "div.full.f32 \t$dst, $a, $b;",
810 [(set Float32Regs:$dst,
811 (fdiv Float32Regs:$a, fpimm:$b))]>,
812 Requires<[do_DIVF32_FULL]>;
814 // F32 Accurate reciprocal
816 def FDIV321r_prec_ftz : NVPTXInst<(outs Float32Regs:$dst),
817 (ins f32imm:$a, Float32Regs:$b),
818 "rcp.rn.ftz.f32 \t$dst, $b;",
819 [(set Float32Regs:$dst,
820 (fdiv FloatConst1:$a, Float32Regs:$b))]>,
821 Requires<[reqPTX20, doF32FTZ]>;
822 def FDIV321r_prec : NVPTXInst<(outs Float32Regs:$dst),
823 (ins f32imm:$a, Float32Regs:$b),
824 "rcp.rn.f32 \t$dst, $b;",
825 [(set Float32Regs:$dst,
826 (fdiv FloatConst1:$a, Float32Regs:$b))]>,
827 Requires<[reqPTX20]>;
829 // F32 Accurate division
831 def FDIV32rr_prec_ftz : NVPTXInst<(outs Float32Regs:$dst),
832 (ins Float32Regs:$a, Float32Regs:$b),
833 "div.rn.ftz.f32 \t$dst, $a, $b;",
834 [(set Float32Regs:$dst,
835 (fdiv Float32Regs:$a, Float32Regs:$b))]>,
836 Requires<[doF32FTZ, reqPTX20]>;
837 def FDIV32ri_prec_ftz : NVPTXInst<(outs Float32Regs:$dst),
838 (ins Float32Regs:$a, f32imm:$b),
839 "div.rn.ftz.f32 \t$dst, $a, $b;",
840 [(set Float32Regs:$dst,
841 (fdiv Float32Regs:$a, fpimm:$b))]>,
842 Requires<[doF32FTZ, reqPTX20]>;
843 def FDIV32rr_prec : NVPTXInst<(outs Float32Regs:$dst),
844 (ins Float32Regs:$a, Float32Regs:$b),
845 "div.rn.f32 \t$dst, $a, $b;",
846 [(set Float32Regs:$dst,
847 (fdiv Float32Regs:$a, Float32Regs:$b))]>,
848 Requires<[reqPTX20]>;
849 def FDIV32ri_prec : NVPTXInst<(outs Float32Regs:$dst),
850 (ins Float32Regs:$a, f32imm:$b),
851 "div.rn.f32 \t$dst, $a, $b;",
852 [(set Float32Regs:$dst,
853 (fdiv Float32Regs:$a, fpimm:$b))]>,
854 Requires<[reqPTX20]>;
860 def RSQRTF32approx1r : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$b),
861 "rsqrt.approx.f32 \t$dst, $b;", []>;
863 def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_f Float32Regs:$b)),
864 (RSQRTF32approx1r Float32Regs:$b)>,
865 Requires<[do_DIVF32_FULL, do_SQRTF32_APPROX, doNoF32FTZ]>;
867 multiclass FPCONTRACT32<string OpcStr, Predicate Pred> {
868 def rrr : NVPTXInst<(outs Float32Regs:$dst),
869 (ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c),
870 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
871 [(set Float32Regs:$dst,
872 (fma Float32Regs:$a, Float32Regs:$b, Float32Regs:$c))]>,
874 def rri : NVPTXInst<(outs Float32Regs:$dst),
875 (ins Float32Regs:$a, Float32Regs:$b, f32imm:$c),
876 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
877 [(set Float32Regs:$dst,
878 (fma Float32Regs:$a, Float32Regs:$b, fpimm:$c))]>,
880 def rir : NVPTXInst<(outs Float32Regs:$dst),
881 (ins Float32Regs:$a, f32imm:$b, Float32Regs:$c),
882 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
883 [(set Float32Regs:$dst,
884 (fma Float32Regs:$a, fpimm:$b, Float32Regs:$c))]>,
886 def rii : NVPTXInst<(outs Float32Regs:$dst),
887 (ins Float32Regs:$a, f32imm:$b, f32imm:$c),
888 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
889 [(set Float32Regs:$dst,
890 (fma Float32Regs:$a, fpimm:$b, fpimm:$c))]>,
894 multiclass FPCONTRACT64<string OpcStr, Predicate Pred> {
895 def rrr : NVPTXInst<(outs Float64Regs:$dst),
896 (ins Float64Regs:$a, Float64Regs:$b, Float64Regs:$c),
897 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
898 [(set Float64Regs:$dst,
899 (fma Float64Regs:$a, Float64Regs:$b, Float64Regs:$c))]>,
901 def rri : NVPTXInst<(outs Float64Regs:$dst),
902 (ins Float64Regs:$a, Float64Regs:$b, f64imm:$c),
903 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
904 [(set Float64Regs:$dst,
905 (fma Float64Regs:$a, Float64Regs:$b, fpimm:$c))]>,
907 def rir : NVPTXInst<(outs Float64Regs:$dst),
908 (ins Float64Regs:$a, f64imm:$b, Float64Regs:$c),
909 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
910 [(set Float64Regs:$dst,
911 (fma Float64Regs:$a, fpimm:$b, Float64Regs:$c))]>,
913 def rii : NVPTXInst<(outs Float64Regs:$dst),
914 (ins Float64Regs:$a, f64imm:$b, f64imm:$c),
915 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
916 [(set Float64Regs:$dst,
917 (fma Float64Regs:$a, fpimm:$b, fpimm:$c))]>,
921 defm FMA32_ftz : FPCONTRACT32<"fma.rn.ftz.f32", doF32FTZ>;
922 defm FMA32 : FPCONTRACT32<"fma.rn.f32", doNoF32FTZ>;
923 defm FMA64 : FPCONTRACT64<"fma.rn.f64", doNoF32FTZ>;
925 def SINF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
926 "sin.approx.f32 \t$dst, $src;",
927 [(set Float32Regs:$dst, (fsin Float32Regs:$src))]>;
928 def COSF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
929 "cos.approx.f32 \t$dst, $src;",
930 [(set Float32Regs:$dst, (fcos Float32Regs:$src))]>;
932 // Lower (frem x, y) into (sub x, (mul (floor (div x, y)) y))
933 // e.g. "poor man's fmod()"
936 def : Pat<(frem Float32Regs:$x, Float32Regs:$y),
937 (FSUBf32rr_ftz Float32Regs:$x, (FMULf32rr_ftz (CVT_f32_f32
938 (FDIV32rr_prec_ftz Float32Regs:$x, Float32Regs:$y), CvtRMI_FTZ),
940 Requires<[doF32FTZ]>;
941 def : Pat<(frem Float32Regs:$x, fpimm:$y),
942 (FSUBf32rr_ftz Float32Regs:$x, (FMULf32ri_ftz (CVT_f32_f32
943 (FDIV32ri_prec_ftz Float32Regs:$x, fpimm:$y), CvtRMI_FTZ),
945 Requires<[doF32FTZ]>;
948 def : Pat<(frem Float32Regs:$x, Float32Regs:$y),
949 (FSUBf32rr Float32Regs:$x, (FMULf32rr (CVT_f32_f32
950 (FDIV32rr_prec Float32Regs:$x, Float32Regs:$y), CvtRMI),
952 def : Pat<(frem Float32Regs:$x, fpimm:$y),
953 (FSUBf32rr Float32Regs:$x, (FMULf32ri (CVT_f32_f32
954 (FDIV32ri_prec Float32Regs:$x, fpimm:$y), CvtRMI),
958 def : Pat<(frem Float64Regs:$x, Float64Regs:$y),
959 (FSUBf64rr Float64Regs:$x, (FMULf64rr (CVT_f64_f64
960 (FDIV64rr Float64Regs:$x, Float64Regs:$y), CvtRMI),
962 def : Pat<(frem Float64Regs:$x, fpimm:$y),
963 (FSUBf64rr Float64Regs:$x, (FMULf64ri (CVT_f64_f64
964 (FDIV64ri Float64Regs:$x, fpimm:$y), CvtRMI),
967 //-----------------------------------
968 // Logical Arithmetic
969 //-----------------------------------
971 multiclass LOG_FORMAT<string OpcStr, SDNode OpNode> {
972 def b1rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b),
973 !strconcat(OpcStr, ".pred \t$dst, $a, $b;"),
974 [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>;
975 def b1ri: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b),
976 !strconcat(OpcStr, ".pred \t$dst, $a, $b;"),
977 [(set Int1Regs:$dst, (OpNode Int1Regs:$a, imm:$b))]>;
978 def b16rr: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
979 !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"),
980 [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
982 def b16ri: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
983 !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"),
984 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>;
985 def b32rr: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
986 !strconcat(OpcStr, ".b32 \t$dst, $a, $b;"),
987 [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
989 def b32ri: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
990 !strconcat(OpcStr, ".b32 \t$dst, $a, $b;"),
991 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
992 def b64rr: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
993 !strconcat(OpcStr, ".b64 \t$dst, $a, $b;"),
994 [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
996 def b64ri: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
997 !strconcat(OpcStr, ".b64 \t$dst, $a, $b;"),
998 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
1001 defm OR : LOG_FORMAT<"or", or>;
1002 defm AND : LOG_FORMAT<"and", and>;
1003 defm XOR : LOG_FORMAT<"xor", xor>;
1005 def NOT1: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$src),
1006 "not.pred \t$dst, $src;",
1007 [(set Int1Regs:$dst, (not Int1Regs:$src))]>;
1008 def NOT16: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
1009 "not.b16 \t$dst, $src;",
1010 [(set Int16Regs:$dst, (not Int16Regs:$src))]>;
1011 def NOT32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
1012 "not.b32 \t$dst, $src;",
1013 [(set Int32Regs:$dst, (not Int32Regs:$src))]>;
1014 def NOT64: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
1015 "not.b64 \t$dst, $src;",
1016 [(set Int64Regs:$dst, (not Int64Regs:$src))]>;
1018 // For shifts, the second src operand must be 32-bit value
1019 multiclass LSHIFT_FORMAT<string OpcStr, SDNode OpNode> {
1020 def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a,
1022 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1023 [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
1025 def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b),
1026 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1027 [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
1029 def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
1031 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1032 [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
1034 def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
1035 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1036 [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
1038 def i32ii : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b),
1039 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1040 [(set Int32Regs:$dst, (OpNode (i32 imm:$a),
1042 def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a,
1044 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1045 [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
1047 def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
1048 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1049 [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
1053 defm SHL : LSHIFT_FORMAT<"shl.b", shl>;
1055 // For shifts, the second src operand must be 32-bit value
1056 // Need to add cvt for the 8-bits.
1057 multiclass RSHIFT_FORMAT<string OpcStr, SDNode OpNode> {
1058 def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a,
1060 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1061 [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
1063 def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b),
1064 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1065 [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
1067 def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
1069 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1070 [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
1072 def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
1073 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1074 [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
1076 def i32ii : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b),
1077 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1078 [(set Int32Regs:$dst, (OpNode (i32 imm:$a),
1080 def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a,
1082 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1083 [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
1085 def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
1086 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1087 [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
1091 defm SRA : RSHIFT_FORMAT<"shr.s", sra>;
1092 defm SRL : RSHIFT_FORMAT<"shr.u", srl>;
1095 // Rotate: use ptx shf instruction if available.
1098 // 32 bit r2 = rotl r1, n
1100 // r2 = shf.l r1, r1, n
1101 def ROTL32imm_hw : NVPTXInst<(outs Int32Regs:$dst),
1102 (ins Int32Regs:$src, i32imm:$amt),
1103 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
1104 [(set Int32Regs:$dst, (rotl Int32Regs:$src, (i32 imm:$amt)))]>,
1105 Requires<[hasHWROT32]> ;
1107 def ROTL32reg_hw : NVPTXInst<(outs Int32Regs:$dst),
1108 (ins Int32Regs:$src, Int32Regs:$amt),
1109 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
1110 [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>,
1111 Requires<[hasHWROT32]>;
1113 // 32 bit r2 = rotr r1, n
1115 // r2 = shf.r r1, r1, n
1116 def ROTR32imm_hw : NVPTXInst<(outs Int32Regs:$dst),
1117 (ins Int32Regs:$src, i32imm:$amt),
1118 "shf.r.wrap.b32 \t$dst, $src, $src, $amt;",
1119 [(set Int32Regs:$dst, (rotr Int32Regs:$src, (i32 imm:$amt)))]>,
1120 Requires<[hasHWROT32]>;
1122 def ROTR32reg_hw : NVPTXInst<(outs Int32Regs:$dst),
1123 (ins Int32Regs:$src, Int32Regs:$amt),
1124 "shf.r.wrap.b32 \t$dst, $src, $src, $amt;",
1125 [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>,
1126 Requires<[hasHWROT32]>;
1129 // Rotate: if ptx shf instruction is not available, then use shift+add
1132 def ROT32imm_sw : NVPTXInst<(outs Int32Regs:$dst),
1133 (ins Int32Regs:$src, i32imm:$amt1, i32imm:$amt2),
1134 !strconcat("{{\n\t",
1135 !strconcat(".reg .b32 %lhs;\n\t",
1136 !strconcat(".reg .b32 %rhs;\n\t",
1137 !strconcat("shl.b32 \t%lhs, $src, $amt1;\n\t",
1138 !strconcat("shr.b32 \t%rhs, $src, $amt2;\n\t",
1139 !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
1140 !strconcat("}}", ""))))))),
1143 def SUB_FRM_32 : SDNodeXForm<imm, [{
1144 return CurDAG->getTargetConstant(32-N->getZExtValue(), MVT::i32);
1147 def : Pat<(rotl Int32Regs:$src, (i32 imm:$amt)),
1148 (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
1149 Requires<[noHWROT32]>;
1150 def : Pat<(rotr Int32Regs:$src, (i32 imm:$amt)),
1151 (ROT32imm_sw Int32Regs:$src, (SUB_FRM_32 node:$amt), imm:$amt)>,
1152 Requires<[noHWROT32]>;
1154 def ROTL32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src,
1156 !strconcat("{{\n\t",
1157 !strconcat(".reg .b32 %lhs;\n\t",
1158 !strconcat(".reg .b32 %rhs;\n\t",
1159 !strconcat(".reg .b32 %amt2;\n\t",
1160 !strconcat("shl.b32 \t%lhs, $src, $amt;\n\t",
1161 !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t",
1162 !strconcat("shr.b32 \t%rhs, $src, %amt2;\n\t",
1163 !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
1164 !strconcat("}}", ""))))))))),
1165 [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>,
1166 Requires<[noHWROT32]>;
1168 def ROTR32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src,
1170 !strconcat("{{\n\t",
1171 !strconcat(".reg .b32 %lhs;\n\t",
1172 !strconcat(".reg .b32 %rhs;\n\t",
1173 !strconcat(".reg .b32 %amt2;\n\t",
1174 !strconcat("shr.b32 \t%lhs, $src, $amt;\n\t",
1175 !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t",
1176 !strconcat("shl.b32 \t%rhs, $src, %amt2;\n\t",
1177 !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
1178 !strconcat("}}", ""))))))))),
1179 [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>,
1180 Requires<[noHWROT32]>;
1183 def ROT64imm_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
1184 i32imm:$amt1, i32imm:$amt2),
1185 !strconcat("{{\n\t",
1186 !strconcat(".reg .b64 %lhs;\n\t",
1187 !strconcat(".reg .b64 %rhs;\n\t",
1188 !strconcat("shl.b64 \t%lhs, $src, $amt1;\n\t",
1189 !strconcat("shr.b64 \t%rhs, $src, $amt2;\n\t",
1190 !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
1191 !strconcat("}}", ""))))))),
1194 def SUB_FRM_64 : SDNodeXForm<imm, [{
1195 return CurDAG->getTargetConstant(64-N->getZExtValue(), MVT::i32);
1198 def : Pat<(rotl Int64Regs:$src, (i32 imm:$amt)),
1199 (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>;
1200 def : Pat<(rotr Int64Regs:$src, (i32 imm:$amt)),
1201 (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>;
1203 def ROTL64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
1205 !strconcat("{{\n\t",
1206 !strconcat(".reg .b64 %lhs;\n\t",
1207 !strconcat(".reg .b64 %rhs;\n\t",
1208 !strconcat(".reg .u32 %amt2;\n\t",
1209 !strconcat("shl.b64 \t%lhs, $src, $amt;\n\t",
1210 !strconcat("sub.u32 \t%amt2, 64, $amt;\n\t",
1211 !strconcat("shr.b64 \t%rhs, $src, %amt2;\n\t",
1212 !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
1213 !strconcat("}}", ""))))))))),
1214 [(set Int64Regs:$dst, (rotl Int64Regs:$src, Int32Regs:$amt))]>;
1216 def ROTR64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
1218 !strconcat("{{\n\t",
1219 !strconcat(".reg .b64 %lhs;\n\t",
1220 !strconcat(".reg .b64 %rhs;\n\t",
1221 !strconcat(".reg .u32 %amt2;\n\t",
1222 !strconcat("shr.b64 \t%lhs, $src, $amt;\n\t",
1223 !strconcat("sub.u32 \t%amt2, 64, $amt;\n\t",
1224 !strconcat("shl.b64 \t%rhs, $src, %amt2;\n\t",
1225 !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
1226 !strconcat("}}", ""))))))))),
1227 [(set Int64Regs:$dst, (rotr Int64Regs:$src, Int32Regs:$amt))]>;
1229 // BFE - bit-field extract
1231 multiclass BFE<string TyStr, RegisterClass RC> {
1232 // BFE supports both 32-bit and 64-bit values, but the start and length
1233 // operands are always 32-bit
1235 : NVPTXInst<(outs RC:$d),
1236 (ins RC:$a, Int32Regs:$b, Int32Regs:$c),
1237 !strconcat("bfe.", TyStr, " \t$d, $a, $b, $c;"), []>;
1239 : NVPTXInst<(outs RC:$d),
1240 (ins RC:$a, Int32Regs:$b, i32imm:$c),
1241 !strconcat("bfe.", TyStr, " \t$d, $a, $b, $c;"), []>;
1243 : NVPTXInst<(outs RC:$d),
1244 (ins RC:$a, i32imm:$b, i32imm:$c),
1245 !strconcat("bfe.", TyStr, " \t$d, $a, $b, $c;"), []>;
1248 defm BFE_S32 : BFE<"s32", Int32Regs>;
1249 defm BFE_U32 : BFE<"u32", Int32Regs>;
1250 defm BFE_S64 : BFE<"s64", Int64Regs>;
1251 defm BFE_U64 : BFE<"u64", Int64Regs>;
1253 //-----------------------------------
1254 // General Comparison
1255 //-----------------------------------
1257 // General setp instructions
1258 multiclass SETP<string TypeStr, RegisterClass RC, Operand ImmCls> {
1259 def rr : NVPTXInst<(outs Int1Regs:$dst),
1260 (ins RC:$a, RC:$b, CmpMode:$cmp),
1261 !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr, "\t$dst, $a, $b;"),
1263 def ri : NVPTXInst<(outs Int1Regs:$dst),
1264 (ins RC:$a, ImmCls:$b, CmpMode:$cmp),
1265 !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr, "\t$dst, $a, $b;"),
1267 def ir : NVPTXInst<(outs Int1Regs:$dst),
1268 (ins ImmCls:$a, RC:$b, CmpMode:$cmp),
1269 !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr, "\t$dst, $a, $b;"),
1273 defm SETP_b16 : SETP<"b16", Int16Regs, i16imm>;
1274 defm SETP_s16 : SETP<"s16", Int16Regs, i16imm>;
1275 defm SETP_u16 : SETP<"u16", Int16Regs, i16imm>;
1276 defm SETP_b32 : SETP<"b32", Int32Regs, i32imm>;
1277 defm SETP_s32 : SETP<"s32", Int32Regs, i32imm>;
1278 defm SETP_u32 : SETP<"u32", Int32Regs, i32imm>;
1279 defm SETP_b64 : SETP<"b64", Int64Regs, i64imm>;
1280 defm SETP_s64 : SETP<"s64", Int64Regs, i64imm>;
1281 defm SETP_u64 : SETP<"u64", Int64Regs, i64imm>;
1282 defm SETP_f32 : SETP<"f32", Float32Regs, f32imm>;
1283 defm SETP_f64 : SETP<"f64", Float64Regs, f64imm>;
1285 // General set instructions
1286 multiclass SET<string TypeStr, RegisterClass RC, Operand ImmCls> {
1287 def rr : NVPTXInst<(outs Int32Regs:$dst),
1288 (ins RC:$a, RC:$b, CmpMode:$cmp),
1289 !strconcat("set$cmp.", TypeStr, "\t$dst, $a, $b;"), []>;
1290 def ri : NVPTXInst<(outs Int32Regs:$dst),
1291 (ins RC:$a, ImmCls:$b, CmpMode:$cmp),
1292 !strconcat("set$cmp.", TypeStr, "\t$dst, $a, $b;"), []>;
1293 def ir : NVPTXInst<(outs Int32Regs:$dst),
1294 (ins ImmCls:$a, RC:$b, CmpMode:$cmp),
1295 !strconcat("set$cmp.", TypeStr, "\t$dst, $a, $b;"), []>;
1298 defm SET_b16 : SET<"b16", Int16Regs, i16imm>;
1299 defm SET_s16 : SET<"s16", Int16Regs, i16imm>;
1300 defm SET_u16 : SET<"u16", Int16Regs, i16imm>;
1301 defm SET_b32 : SET<"b32", Int32Regs, i32imm>;
1302 defm SET_s32 : SET<"s32", Int32Regs, i32imm>;
1303 defm SET_u32 : SET<"u32", Int32Regs, i32imm>;
1304 defm SET_b64 : SET<"b64", Int64Regs, i64imm>;
1305 defm SET_s64 : SET<"s64", Int64Regs, i64imm>;
1306 defm SET_u64 : SET<"u64", Int64Regs, i64imm>;
1307 defm SET_f32 : SET<"f32", Float32Regs, f32imm>;
1308 defm SET_f64 : SET<"f64", Float64Regs, f64imm>;
1310 //-----------------------------------
1311 // General Selection
1312 //-----------------------------------
1314 // General selp instructions
1315 multiclass SELP<string TypeStr, RegisterClass RC, Operand ImmCls> {
1316 def rr : NVPTXInst<(outs RC:$dst),
1317 (ins RC:$a, RC:$b, Int1Regs:$p),
1318 !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>;
1319 def ri : NVPTXInst<(outs RC:$dst),
1320 (ins RC:$a, ImmCls:$b, Int1Regs:$p),
1321 !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>;
1322 def ir : NVPTXInst<(outs RC:$dst),
1323 (ins ImmCls:$a, RC:$b, Int1Regs:$p),
1324 !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>;
1325 def ii : NVPTXInst<(outs RC:$dst),
1326 (ins ImmCls:$a, ImmCls:$b, Int1Regs:$p),
1327 !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>;
1330 multiclass SELP_PATTERN<string TypeStr, RegisterClass RC, Operand ImmCls,
1332 def rr : NVPTXInst<(outs RC:$dst),
1333 (ins RC:$a, RC:$b, Int1Regs:$p),
1334 !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"),
1335 [(set RC:$dst, (select Int1Regs:$p, RC:$a, RC:$b))]>;
1336 def ri : NVPTXInst<(outs RC:$dst),
1337 (ins RC:$a, ImmCls:$b, Int1Regs:$p),
1338 !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"),
1339 [(set RC:$dst, (select Int1Regs:$p, RC:$a, ImmNode:$b))]>;
1340 def ir : NVPTXInst<(outs RC:$dst),
1341 (ins ImmCls:$a, RC:$b, Int1Regs:$p),
1342 !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"),
1343 [(set RC:$dst, (select Int1Regs:$p, ImmNode:$a, RC:$b))]>;
1344 def ii : NVPTXInst<(outs RC:$dst),
1345 (ins ImmCls:$a, ImmCls:$b, Int1Regs:$p),
1346 !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"),
1347 [(set RC:$dst, (select Int1Regs:$p, ImmNode:$a, ImmNode:$b))]>;
1350 defm SELP_b16 : SELP_PATTERN<"b16", Int16Regs, i16imm, imm>;
1351 defm SELP_s16 : SELP<"s16", Int16Regs, i16imm>;
1352 defm SELP_u16 : SELP<"u16", Int16Regs, i16imm>;
1353 defm SELP_b32 : SELP_PATTERN<"b32", Int32Regs, i32imm, imm>;
1354 defm SELP_s32 : SELP<"s32", Int32Regs, i32imm>;
1355 defm SELP_u32 : SELP<"u32", Int32Regs, i32imm>;
1356 defm SELP_b64 : SELP_PATTERN<"b64", Int64Regs, i64imm, imm>;
1357 defm SELP_s64 : SELP<"s64", Int64Regs, i64imm>;
1358 defm SELP_u64 : SELP<"u64", Int64Regs, i64imm>;
1359 defm SELP_f32 : SELP_PATTERN<"f32", Float32Regs, f32imm, fpimm>;
1360 defm SELP_f64 : SELP_PATTERN<"f64", Float64Regs, f64imm, fpimm>;
1362 // Special select for predicate operands
1363 def : Pat<(i1 (select Int1Regs:$p, Int1Regs:$a, Int1Regs:$b)),
1364 (ORb1rr (ANDb1rr Int1Regs:$p, Int1Regs:$a),
1365 (ANDb1rr (NOT1 Int1Regs:$p), Int1Regs:$b))>;
1367 //-----------------------------------
1368 // Data Movement (Load / Store, Move)
1369 //-----------------------------------
1371 def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex],
1373 def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri64", [frameindex],
1376 def MEMri : Operand<i32> {
1377 let PrintMethod = "printMemOperand";
1378 let MIOperandInfo = (ops Int32Regs, i32imm);
1380 def MEMri64 : Operand<i64> {
1381 let PrintMethod = "printMemOperand";
1382 let MIOperandInfo = (ops Int64Regs, i64imm);
1385 def imem : Operand<iPTR> {
1386 let PrintMethod = "printOperand";
1389 def imemAny : Operand<iPTRAny> {
1390 let PrintMethod = "printOperand";
1393 def LdStCode : Operand<i32> {
1394 let PrintMethod = "printLdStCode";
1397 def SDTWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
1398 def Wrapper : SDNode<"NVPTXISD::Wrapper", SDTWrapper>;
1400 def MOV_ADDR : NVPTXInst<(outs Int32Regs:$dst), (ins imem:$a),
1401 "mov.u32 \t$dst, $a;",
1402 [(set Int32Regs:$dst, (Wrapper tglobaladdr:$a))]>;
1404 def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins imem:$a),
1405 "mov.u64 \t$dst, $a;",
1406 [(set Int64Regs:$dst, (Wrapper tglobaladdr:$a))]>;
1408 // Get pointer to local stack
1410 : NVPTXInst<(outs Int32Regs:$d), (ins i32imm:$num),
1411 "mov.u32 \t$d, __local_depot$num;", []>;
1412 def MOV_DEPOT_ADDR_64
1413 : NVPTXInst<(outs Int64Regs:$d), (ins i32imm:$num),
1414 "mov.u64 \t$d, __local_depot$num;", []>;
1417 // copyPhysreg is hard-coded in NVPTXInstrInfo.cpp
1418 let IsSimpleMove=1 in {
1419 def IMOV1rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$sss),
1420 "mov.pred \t$dst, $sss;", []>;
1421 def IMOV16rr: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss),
1422 "mov.u16 \t$dst, $sss;", []>;
1423 def IMOV32rr: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$sss),
1424 "mov.u32 \t$dst, $sss;", []>;
1425 def IMOV64rr: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$sss),
1426 "mov.u64 \t$dst, $sss;", []>;
1428 def FMOV32rr: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
1429 "mov.f32 \t$dst, $src;", []>;
1430 def FMOV64rr: NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src),
1431 "mov.f64 \t$dst, $src;", []>;
1433 def IMOV1ri: NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src),
1434 "mov.pred \t$dst, $src;",
1435 [(set Int1Regs:$dst, imm:$src)]>;
1436 def IMOV16ri: NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src),
1437 "mov.u16 \t$dst, $src;",
1438 [(set Int16Regs:$dst, imm:$src)]>;
1439 def IMOV32ri: NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src),
1440 "mov.u32 \t$dst, $src;",
1441 [(set Int32Regs:$dst, imm:$src)]>;
1442 def IMOV64i: NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src),
1443 "mov.u64 \t$dst, $src;",
1444 [(set Int64Regs:$dst, imm:$src)]>;
1446 def FMOV32ri: NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$src),
1447 "mov.f32 \t$dst, $src;",
1448 [(set Float32Regs:$dst, fpimm:$src)]>;
1449 def FMOV64ri: NVPTXInst<(outs Float64Regs:$dst), (ins f64imm:$src),
1450 "mov.f64 \t$dst, $src;",
1451 [(set Float64Regs:$dst, fpimm:$src)]>;
1453 def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32ri texternalsym:$dst)>;
1455 //---- Copy Frame Index ----
1456 def LEA_ADDRi : NVPTXInst<(outs Int32Regs:$dst), (ins MEMri:$addr),
1457 "add.u32 \t$dst, ${addr:add};",
1458 [(set Int32Regs:$dst, ADDRri:$addr)]>;
1459 def LEA_ADDRi64 : NVPTXInst<(outs Int64Regs:$dst), (ins MEMri64:$addr),
1460 "add.u64 \t$dst, ${addr:add};",
1461 [(set Int64Regs:$dst, ADDRri64:$addr)]>;
1463 //-----------------------------------
1464 // Comparison and Selection
1465 //-----------------------------------
1467 multiclass ISET_FORMAT<PatFrag OpNode, PatLeaf Mode,
1468 Instruction setp_16rr,
1469 Instruction setp_16ri,
1470 Instruction setp_16ir,
1471 Instruction setp_32rr,
1472 Instruction setp_32ri,
1473 Instruction setp_32ir,
1474 Instruction setp_64rr,
1475 Instruction setp_64ri,
1476 Instruction setp_64ir,
1477 Instruction set_16rr,
1478 Instruction set_16ri,
1479 Instruction set_16ir,
1480 Instruction set_32rr,
1481 Instruction set_32ri,
1482 Instruction set_32ir,
1483 Instruction set_64rr,
1484 Instruction set_64ri,
1485 Instruction set_64ir> {
1487 def : Pat<(i1 (OpNode Int16Regs:$a, Int16Regs:$b)),
1488 (setp_16rr Int16Regs:$a, Int16Regs:$b, Mode)>;
1489 def : Pat<(i1 (OpNode Int16Regs:$a, imm:$b)),
1490 (setp_16ri Int16Regs:$a, imm:$b, Mode)>;
1491 def : Pat<(i1 (OpNode imm:$a, Int16Regs:$b)),
1492 (setp_16ir imm:$a, Int16Regs:$b, Mode)>;
1494 def : Pat<(i1 (OpNode Int32Regs:$a, Int32Regs:$b)),
1495 (setp_32rr Int32Regs:$a, Int32Regs:$b, Mode)>;
1496 def : Pat<(i1 (OpNode Int32Regs:$a, imm:$b)),
1497 (setp_32ri Int32Regs:$a, imm:$b, Mode)>;
1498 def : Pat<(i1 (OpNode imm:$a, Int32Regs:$b)),
1499 (setp_32ir imm:$a, Int32Regs:$b, Mode)>;
1501 def : Pat<(i1 (OpNode Int64Regs:$a, Int64Regs:$b)),
1502 (setp_64rr Int64Regs:$a, Int64Regs:$b, Mode)>;
1503 def : Pat<(i1 (OpNode Int64Regs:$a, imm:$b)),
1504 (setp_64ri Int64Regs:$a, imm:$b, Mode)>;
1505 def : Pat<(i1 (OpNode imm:$a, Int64Regs:$b)),
1506 (setp_64ir imm:$a, Int64Regs:$b, Mode)>;
1509 def : Pat<(i32 (OpNode Int16Regs:$a, Int16Regs:$b)),
1510 (set_16rr Int16Regs:$a, Int16Regs:$b, Mode)>;
1511 def : Pat<(i32 (OpNode Int16Regs:$a, imm:$b)),
1512 (set_16ri Int16Regs:$a, imm:$b, Mode)>;
1513 def : Pat<(i32 (OpNode imm:$a, Int16Regs:$b)),
1514 (set_16ir imm:$a, Int16Regs:$b, Mode)>;
1516 def : Pat<(i32 (OpNode Int32Regs:$a, Int32Regs:$b)),
1517 (set_32rr Int32Regs:$a, Int32Regs:$b, Mode)>;
1518 def : Pat<(i32 (OpNode Int32Regs:$a, imm:$b)),
1519 (set_32ri Int32Regs:$a, imm:$b, Mode)>;
1520 def : Pat<(i32 (OpNode imm:$a, Int32Regs:$b)),
1521 (set_32ir imm:$a, Int32Regs:$b, Mode)>;
1523 def : Pat<(i32 (OpNode Int64Regs:$a, Int64Regs:$b)),
1524 (set_64rr Int64Regs:$a, Int64Regs:$b, Mode)>;
1525 def : Pat<(i32 (OpNode Int64Regs:$a, imm:$b)),
1526 (set_64ri Int64Regs:$a, imm:$b, Mode)>;
1527 def : Pat<(i32 (OpNode imm:$a, Int64Regs:$b)),
1528 (set_64ir imm:$a, Int64Regs:$b, Mode)>;
1531 multiclass ISET_FORMAT_SIGNED<PatFrag OpNode, PatLeaf Mode>
1532 : ISET_FORMAT<OpNode, Mode,
1533 SETP_s16rr, SETP_s16ri, SETP_s16ir,
1534 SETP_s32rr, SETP_s32ri, SETP_s32ir,
1535 SETP_s64rr, SETP_s64ri, SETP_s64ir,
1536 SET_s16rr, SET_s16ri, SET_s16ir,
1537 SET_s32rr, SET_s32ri, SET_s32ir,
1538 SET_s64rr, SET_s64ri, SET_s64ir> {
1539 // TableGen doesn't like empty multiclasses
1540 def : PatLeaf<(i32 0)>;
1543 multiclass ISET_FORMAT_UNSIGNED<PatFrag OpNode, PatLeaf Mode>
1544 : ISET_FORMAT<OpNode, Mode,
1545 SETP_u16rr, SETP_u16ri, SETP_u16ir,
1546 SETP_u32rr, SETP_u32ri, SETP_u32ir,
1547 SETP_u64rr, SETP_u64ri, SETP_u64ir,
1548 SET_u16rr, SET_u16ri, SET_u16ir,
1549 SET_u32rr, SET_u32ri, SET_u32ir,
1550 SET_u64rr, SET_u64ri, SET_u64ir> {
1551 // TableGen doesn't like empty multiclasses
1552 def : PatLeaf<(i32 0)>;
1555 defm : ISET_FORMAT_SIGNED<setgt, CmpGT>;
1556 defm : ISET_FORMAT_UNSIGNED<setugt, CmpGT>;
1557 defm : ISET_FORMAT_SIGNED<setlt, CmpLT>;
1558 defm : ISET_FORMAT_UNSIGNED<setult, CmpLT>;
1559 defm : ISET_FORMAT_SIGNED<setge, CmpGE>;
1560 defm : ISET_FORMAT_UNSIGNED<setuge, CmpGE>;
1561 defm : ISET_FORMAT_SIGNED<setle, CmpLE>;
1562 defm : ISET_FORMAT_UNSIGNED<setule, CmpLE>;
1563 defm : ISET_FORMAT_SIGNED<seteq, CmpEQ>;
1564 defm : ISET_FORMAT_UNSIGNED<setueq, CmpEQ>;
1565 defm : ISET_FORMAT_SIGNED<setne, CmpNE>;
1566 defm : ISET_FORMAT_UNSIGNED<setune, CmpNE>;
1569 def : Pat<(setne Int1Regs:$a, Int1Regs:$b),
1570 (XORb1rr Int1Regs:$a, Int1Regs:$b)>;
1571 def : Pat<(setune Int1Regs:$a, Int1Regs:$b),
1572 (XORb1rr Int1Regs:$a, Int1Regs:$b)>;
1574 def : Pat<(seteq Int1Regs:$a, Int1Regs:$b),
1575 (NOT1 (XORb1rr Int1Regs:$a, Int1Regs:$b))>;
1576 def : Pat<(setueq Int1Regs:$a, Int1Regs:$b),
1577 (NOT1 (XORb1rr Int1Regs:$a, Int1Regs:$b))>;
1579 // i1 compare -> i32
1580 def : Pat<(i32 (setne Int1Regs:$a, Int1Regs:$b)),
1581 (SELP_u32ii -1, 0, (XORb1rr Int1Regs:$a, Int1Regs:$b))>;
1582 def : Pat<(i32 (setne Int1Regs:$a, Int1Regs:$b)),
1583 (SELP_u32ii 0, -1, (XORb1rr Int1Regs:$a, Int1Regs:$b))>;
1587 multiclass FSET_FORMAT<PatFrag OpNode, PatLeaf Mode, PatLeaf ModeFTZ> {
1589 def : Pat<(i1 (OpNode Float32Regs:$a, Float32Regs:$b)),
1590 (SETP_f32rr Float32Regs:$a, Float32Regs:$b, ModeFTZ)>,
1591 Requires<[doF32FTZ]>;
1592 def : Pat<(i1 (OpNode Float32Regs:$a, Float32Regs:$b)),
1593 (SETP_f32rr Float32Regs:$a, Float32Regs:$b, Mode)>;
1594 def : Pat<(i1 (OpNode Float32Regs:$a, fpimm:$b)),
1595 (SETP_f32ri Float32Regs:$a, fpimm:$b, ModeFTZ)>,
1596 Requires<[doF32FTZ]>;
1597 def : Pat<(i1 (OpNode Float32Regs:$a, fpimm:$b)),
1598 (SETP_f32ri Float32Regs:$a, fpimm:$b, Mode)>;
1599 def : Pat<(i1 (OpNode fpimm:$a, Float32Regs:$b)),
1600 (SETP_f32ir fpimm:$a, Float32Regs:$b, ModeFTZ)>,
1601 Requires<[doF32FTZ]>;
1602 def : Pat<(i1 (OpNode fpimm:$a, Float32Regs:$b)),
1603 (SETP_f32ir fpimm:$a, Float32Regs:$b, Mode)>;
1606 def : Pat<(i1 (OpNode Float64Regs:$a, Float64Regs:$b)),
1607 (SETP_f64rr Float64Regs:$a, Float64Regs:$b, Mode)>;
1608 def : Pat<(i1 (OpNode Float64Regs:$a, fpimm:$b)),
1609 (SETP_f64ri Float64Regs:$a, fpimm:$b, Mode)>;
1610 def : Pat<(i1 (OpNode fpimm:$a, Float64Regs:$b)),
1611 (SETP_f64ir fpimm:$a, Float64Regs:$b, Mode)>;
1614 def : Pat<(i32 (OpNode Float32Regs:$a, Float32Regs:$b)),
1615 (SET_f32rr Float32Regs:$a, Float32Regs:$b, ModeFTZ)>,
1616 Requires<[doF32FTZ]>;
1617 def : Pat<(i32 (OpNode Float32Regs:$a, Float32Regs:$b)),
1618 (SET_f32rr Float32Regs:$a, Float32Regs:$b, Mode)>;
1619 def : Pat<(i32 (OpNode Float32Regs:$a, fpimm:$b)),
1620 (SET_f32ri Float32Regs:$a, fpimm:$b, ModeFTZ)>,
1621 Requires<[doF32FTZ]>;
1622 def : Pat<(i32 (OpNode Float32Regs:$a, fpimm:$b)),
1623 (SET_f32ri Float32Regs:$a, fpimm:$b, Mode)>;
1624 def : Pat<(i32 (OpNode fpimm:$a, Float32Regs:$b)),
1625 (SET_f32ir fpimm:$a, Float32Regs:$b, ModeFTZ)>,
1626 Requires<[doF32FTZ]>;
1627 def : Pat<(i32 (OpNode fpimm:$a, Float32Regs:$b)),
1628 (SET_f32ir fpimm:$a, Float32Regs:$b, Mode)>;
1631 def : Pat<(i32 (OpNode Float64Regs:$a, Float64Regs:$b)),
1632 (SET_f64rr Float64Regs:$a, Float64Regs:$b, Mode)>;
1633 def : Pat<(i32 (OpNode Float64Regs:$a, fpimm:$b)),
1634 (SET_f64ri Float64Regs:$a, fpimm:$b, Mode)>;
1635 def : Pat<(i32 (OpNode fpimm:$a, Float64Regs:$b)),
1636 (SET_f64ir fpimm:$a, Float64Regs:$b, Mode)>;
1639 defm FSetGT : FSET_FORMAT<setogt, CmpGT, CmpGT_FTZ>;
1640 defm FSetLT : FSET_FORMAT<setolt, CmpLT, CmpLT_FTZ>;
1641 defm FSetGE : FSET_FORMAT<setoge, CmpGE, CmpGE_FTZ>;
1642 defm FSetLE : FSET_FORMAT<setole, CmpLE, CmpLE_FTZ>;
1643 defm FSetEQ : FSET_FORMAT<setoeq, CmpEQ, CmpEQ_FTZ>;
1644 defm FSetNE : FSET_FORMAT<setone, CmpNE, CmpNE_FTZ>;
1646 defm FSetUGT : FSET_FORMAT<setugt, CmpGTU, CmpGTU_FTZ>;
1647 defm FSetULT : FSET_FORMAT<setult, CmpLTU, CmpLTU_FTZ>;
1648 defm FSetUGE : FSET_FORMAT<setuge, CmpGEU, CmpGEU_FTZ>;
1649 defm FSetULE : FSET_FORMAT<setule, CmpLEU, CmpLEU_FTZ>;
1650 defm FSetUEQ : FSET_FORMAT<setueq, CmpEQU, CmpEQU_FTZ>;
1651 defm FSetUNE : FSET_FORMAT<setune, CmpNEU, CmpNEU_FTZ>;
1653 defm FSetNUM : FSET_FORMAT<seto, CmpNUM, CmpNUM_FTZ>;
1654 defm FSetNAN : FSET_FORMAT<setuo, CmpNAN, CmpNAN_FTZ>;
1656 //def ld_param : SDNode<"NVPTXISD::LOAD_PARAM", SDTLoad,
1657 // [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
1659 def SDTDeclareParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>,
1661 def SDTDeclareScalarParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>,
1662 SDTCisInt<1>, SDTCisInt<2>]>;
1663 def SDTLoadParamProfile : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>;
1664 def SDTLoadParamV2Profile : SDTypeProfile<2, 2, [SDTCisSameAs<0, 1>, SDTCisInt<2>, SDTCisInt<3>]>;
1665 def SDTLoadParamV4Profile : SDTypeProfile<4, 2, [SDTCisInt<4>, SDTCisInt<5>]>;
1666 def SDTPrintCallProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
1667 def SDTPrintCallUniProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
1668 def SDTStoreParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>;
1669 def SDTStoreParamV2Profile : SDTypeProfile<0, 4, [SDTCisInt<0>, SDTCisInt<1>]>;
1670 def SDTStoreParamV4Profile : SDTypeProfile<0, 6, [SDTCisInt<0>, SDTCisInt<1>]>;
1671 def SDTStoreParam32Profile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>;
1672 def SDTCallArgProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>;
1673 def SDTCallArgMarkProfile : SDTypeProfile<0, 0, []>;
1674 def SDTCallVoidProfile : SDTypeProfile<0, 1, []>;
1675 def SDTCallValProfile : SDTypeProfile<1, 0, []>;
1676 def SDTMoveParamProfile : SDTypeProfile<1, 1, []>;
1677 def SDTStoreRetvalProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>;
1678 def SDTStoreRetvalV2Profile : SDTypeProfile<0, 3, [SDTCisInt<0>]>;
1679 def SDTStoreRetvalV4Profile : SDTypeProfile<0, 5, [SDTCisInt<0>]>;
1680 def SDTPseudoUseParamProfile : SDTypeProfile<0, 1, []>;
1682 def DeclareParam : SDNode<"NVPTXISD::DeclareParam", SDTDeclareParamProfile,
1683 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1684 def DeclareScalarParam : SDNode<"NVPTXISD::DeclareScalarParam",
1685 SDTDeclareScalarParamProfile,
1686 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1687 def DeclareRetParam : SDNode<"NVPTXISD::DeclareRetParam",
1688 SDTDeclareParamProfile,
1689 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1690 def DeclareRet : SDNode<"NVPTXISD::DeclareRet", SDTDeclareScalarParamProfile,
1691 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1692 def LoadParam : SDNode<"NVPTXISD::LoadParam", SDTLoadParamProfile,
1693 [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>;
1694 def LoadParamV2 : SDNode<"NVPTXISD::LoadParamV2", SDTLoadParamV2Profile,
1695 [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>;
1696 def LoadParamV4 : SDNode<"NVPTXISD::LoadParamV4", SDTLoadParamV4Profile,
1697 [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>;
1698 def PrintCall : SDNode<"NVPTXISD::PrintCall", SDTPrintCallProfile,
1699 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1700 def PrintCallUni : SDNode<"NVPTXISD::PrintCallUni", SDTPrintCallUniProfile,
1701 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1702 def StoreParam : SDNode<"NVPTXISD::StoreParam", SDTStoreParamProfile,
1703 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1704 def StoreParamV2 : SDNode<"NVPTXISD::StoreParamV2", SDTStoreParamV2Profile,
1705 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1706 def StoreParamV4 : SDNode<"NVPTXISD::StoreParamV4", SDTStoreParamV4Profile,
1707 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1708 def StoreParamU32 : SDNode<"NVPTXISD::StoreParamU32", SDTStoreParam32Profile,
1709 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1710 def StoreParamS32 : SDNode<"NVPTXISD::StoreParamS32", SDTStoreParam32Profile,
1711 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1712 def CallArgBegin : SDNode<"NVPTXISD::CallArgBegin", SDTCallArgMarkProfile,
1713 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1714 def CallArg : SDNode<"NVPTXISD::CallArg", SDTCallArgProfile,
1715 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1716 def LastCallArg : SDNode<"NVPTXISD::LastCallArg", SDTCallArgProfile,
1717 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1718 def CallArgEnd : SDNode<"NVPTXISD::CallArgEnd", SDTCallVoidProfile,
1719 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1720 def CallVoid : SDNode<"NVPTXISD::CallVoid", SDTCallVoidProfile,
1721 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1722 def Prototype : SDNode<"NVPTXISD::Prototype", SDTCallVoidProfile,
1723 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1724 def CallVal : SDNode<"NVPTXISD::CallVal", SDTCallValProfile,
1725 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1726 def MoveParam : SDNode<"NVPTXISD::MoveParam", SDTMoveParamProfile,
1728 def StoreRetval : SDNode<"NVPTXISD::StoreRetval", SDTStoreRetvalProfile,
1729 [SDNPHasChain, SDNPSideEffect]>;
1730 def StoreRetvalV2 : SDNode<"NVPTXISD::StoreRetvalV2", SDTStoreRetvalV2Profile,
1731 [SDNPHasChain, SDNPSideEffect]>;
1732 def StoreRetvalV4 : SDNode<"NVPTXISD::StoreRetvalV4", SDTStoreRetvalV4Profile,
1733 [SDNPHasChain, SDNPSideEffect]>;
1734 def PseudoUseParam : SDNode<"NVPTXISD::PseudoUseParam",
1735 SDTPseudoUseParamProfile,
1736 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1737 def RETURNNode : SDNode<"NVPTXISD::RETURN", SDTCallArgMarkProfile,
1738 [SDNPHasChain, SDNPSideEffect]>;
1740 class LoadParamMemInst<NVPTXRegClass regclass, string opstr> :
1741 NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
1742 !strconcat(!strconcat("ld.param", opstr),
1743 "\t$dst, [retval0+$b];"),
1746 class LoadParamRegInst<NVPTXRegClass regclass, string opstr> :
1747 NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
1748 !strconcat(!strconcat("mov", opstr),
1749 "\t$dst, retval$b;"),
1750 [(set regclass:$dst, (LoadParam (i32 0), (i32 imm:$b)))]>;
1752 class LoadParamV2MemInst<NVPTXRegClass regclass, string opstr> :
1753 NVPTXInst<(outs regclass:$dst, regclass:$dst2), (ins i32imm:$b),
1754 !strconcat(!strconcat("ld.param.v2", opstr),
1755 "\t{{$dst, $dst2}}, [retval0+$b];"), []>;
1757 class LoadParamV4MemInst<NVPTXRegClass regclass, string opstr> :
1758 NVPTXInst<(outs regclass:$dst, regclass:$dst2, regclass:$dst3,
1761 !strconcat(!strconcat("ld.param.v4", opstr),
1762 "\t{{$dst, $dst2, $dst3, $dst4}}, [retval0+$b];"), []>;
1764 class StoreParamInst<NVPTXRegClass regclass, string opstr> :
1765 NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b),
1766 !strconcat(!strconcat("st.param", opstr),
1767 "\t[param$a+$b], $val;"),
1770 class StoreParamV2Inst<NVPTXRegClass regclass, string opstr> :
1771 NVPTXInst<(outs), (ins regclass:$val, regclass:$val2,
1772 i32imm:$a, i32imm:$b),
1773 !strconcat(!strconcat("st.param.v2", opstr),
1774 "\t[param$a+$b], {{$val, $val2}};"),
1777 class StoreParamV4Inst<NVPTXRegClass regclass, string opstr> :
1778 NVPTXInst<(outs), (ins regclass:$val, regclass:$val1, regclass:$val2,
1779 regclass:$val3, i32imm:$a, i32imm:$b),
1780 !strconcat(!strconcat("st.param.v4", opstr),
1781 "\t[param$a+$b], {{$val, $val2, $val3, $val4}};"),
1784 class StoreRetvalInst<NVPTXRegClass regclass, string opstr> :
1785 NVPTXInst<(outs), (ins regclass:$val, i32imm:$a),
1786 !strconcat(!strconcat("st.param", opstr),
1787 "\t[func_retval0+$a], $val;"),
1790 class StoreRetvalV2Inst<NVPTXRegClass regclass, string opstr> :
1791 NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, i32imm:$a),
1792 !strconcat(!strconcat("st.param.v2", opstr),
1793 "\t[func_retval0+$a], {{$val, $val2}};"),
1796 class StoreRetvalV4Inst<NVPTXRegClass regclass, string opstr> :
1798 (ins regclass:$val, regclass:$val2, regclass:$val3,
1799 regclass:$val4, i32imm:$a),
1800 !strconcat(!strconcat("st.param.v4", opstr),
1801 "\t[func_retval0+$a], {{$val, $val2, $val3, $val4}};"),
1804 def PrintCallRetInst1 : NVPTXInst<(outs), (ins),
1806 [(PrintCall (i32 1))]>;
1807 def PrintCallRetInst2 : NVPTXInst<(outs), (ins),
1808 "call (retval0, retval1), ",
1809 [(PrintCall (i32 2))]>;
1810 def PrintCallRetInst3 : NVPTXInst<(outs), (ins),
1811 "call (retval0, retval1, retval2), ",
1812 [(PrintCall (i32 3))]>;
1813 def PrintCallRetInst4 : NVPTXInst<(outs), (ins),
1814 "call (retval0, retval1, retval2, retval3), ",
1815 [(PrintCall (i32 4))]>;
1816 def PrintCallRetInst5 : NVPTXInst<(outs), (ins),
1817 "call (retval0, retval1, retval2, retval3, retval4), ",
1818 [(PrintCall (i32 5))]>;
1819 def PrintCallRetInst6 : NVPTXInst<(outs), (ins),
1820 "call (retval0, retval1, retval2, retval3, retval4, retval5), ",
1821 [(PrintCall (i32 6))]>;
1822 def PrintCallRetInst7 : NVPTXInst<(outs), (ins),
1823 "call (retval0, retval1, retval2, retval3, retval4, retval5, retval6), ",
1824 [(PrintCall (i32 7))]>;
1825 def PrintCallRetInst8 : NVPTXInst<(outs), (ins),
1826 !strconcat("call (retval0, retval1, retval2, retval3, retval4",
1827 ", retval5, retval6, retval7), "),
1828 [(PrintCall (i32 8))]>;
1830 def PrintCallNoRetInst : NVPTXInst<(outs), (ins), "call ",
1831 [(PrintCall (i32 0))]>;
1833 def PrintCallUniRetInst1 : NVPTXInst<(outs), (ins),
1834 "call.uni (retval0), ",
1835 [(PrintCallUni (i32 1))]>;
1836 def PrintCallUniRetInst2 : NVPTXInst<(outs), (ins),
1837 "call.uni (retval0, retval1), ",
1838 [(PrintCallUni (i32 2))]>;
1839 def PrintCallUniRetInst3 : NVPTXInst<(outs), (ins),
1840 "call.uni (retval0, retval1, retval2), ",
1841 [(PrintCallUni (i32 3))]>;
1842 def PrintCallUniRetInst4 : NVPTXInst<(outs), (ins),
1843 "call.uni (retval0, retval1, retval2, retval3), ",
1844 [(PrintCallUni (i32 4))]>;
1845 def PrintCallUniRetInst5 : NVPTXInst<(outs), (ins),
1846 "call.uni (retval0, retval1, retval2, retval3, retval4), ",
1847 [(PrintCallUni (i32 5))]>;
1848 def PrintCallUniRetInst6 : NVPTXInst<(outs), (ins),
1849 "call.uni (retval0, retval1, retval2, retval3, retval4, retval5), ",
1850 [(PrintCallUni (i32 6))]>;
1851 def PrintCallUniRetInst7 : NVPTXInst<(outs), (ins),
1852 "call.uni (retval0, retval1, retval2, retval3, retval4, retval5, retval6), ",
1853 [(PrintCallUni (i32 7))]>;
1854 def PrintCallUniRetInst8 : NVPTXInst<(outs), (ins),
1855 !strconcat("call.uni (retval0, retval1, retval2, retval3, retval4",
1856 ", retval5, retval6, retval7), "),
1857 [(PrintCallUni (i32 8))]>;
1859 def PrintCallUniNoRetInst : NVPTXInst<(outs), (ins), "call.uni ",
1860 [(PrintCallUni (i32 0))]>;
1862 def LoadParamMemI64 : LoadParamMemInst<Int64Regs, ".b64">;
1863 def LoadParamMemI32 : LoadParamMemInst<Int32Regs, ".b32">;
1864 def LoadParamMemI16 : LoadParamMemInst<Int16Regs, ".b16">;
1865 def LoadParamMemI8 : LoadParamMemInst<Int16Regs, ".b8">;
1866 def LoadParamMemV2I64 : LoadParamV2MemInst<Int64Regs, ".b64">;
1867 def LoadParamMemV2I32 : LoadParamV2MemInst<Int32Regs, ".b32">;
1868 def LoadParamMemV2I16 : LoadParamV2MemInst<Int16Regs, ".b16">;
1869 def LoadParamMemV2I8 : LoadParamV2MemInst<Int16Regs, ".b8">;
1870 def LoadParamMemV4I32 : LoadParamV4MemInst<Int32Regs, ".b32">;
1871 def LoadParamMemV4I16 : LoadParamV4MemInst<Int16Regs, ".b16">;
1872 def LoadParamMemV4I8 : LoadParamV4MemInst<Int16Regs, ".b8">;
1873 def LoadParamMemF32 : LoadParamMemInst<Float32Regs, ".f32">;
1874 def LoadParamMemF64 : LoadParamMemInst<Float64Regs, ".f64">;
1875 def LoadParamMemV2F32 : LoadParamV2MemInst<Float32Regs, ".f32">;
1876 def LoadParamMemV2F64 : LoadParamV2MemInst<Float64Regs, ".f64">;
1877 def LoadParamMemV4F32 : LoadParamV4MemInst<Float32Regs, ".f32">;
1879 def StoreParamI64 : StoreParamInst<Int64Regs, ".b64">;
1880 def StoreParamI32 : StoreParamInst<Int32Regs, ".b32">;
1882 def StoreParamI16 : StoreParamInst<Int16Regs, ".b16">;
1883 def StoreParamI8 : StoreParamInst<Int16Regs, ".b8">;
1884 def StoreParamV2I64 : StoreParamV2Inst<Int64Regs, ".b64">;
1885 def StoreParamV2I32 : StoreParamV2Inst<Int32Regs, ".b32">;
1886 def StoreParamV2I16 : StoreParamV2Inst<Int16Regs, ".b16">;
1887 def StoreParamV2I8 : StoreParamV2Inst<Int16Regs, ".b8">;
1889 // FIXME: StoreParamV4Inst crashes llvm-tblgen :(
1890 //def StoreParamV4I32 : StoreParamV4Inst<Int32Regs, ".b32">;
1891 def StoreParamV4I32 : NVPTXInst<(outs), (ins Int32Regs:$val, Int32Regs:$val2,
1892 Int32Regs:$val3, Int32Regs:$val4,
1893 i32imm:$a, i32imm:$b),
1894 "st.param.b32\t[param$a+$b], {{$val, $val2, $val3, $val4}};",
1897 def StoreParamV4I16 : NVPTXInst<(outs), (ins Int16Regs:$val, Int16Regs:$val2,
1898 Int16Regs:$val3, Int16Regs:$val4,
1899 i32imm:$a, i32imm:$b),
1900 "st.param.v4.b16\t[param$a+$b], {{$val, $val2, $val3, $val4}};",
1903 def StoreParamV4I8 : NVPTXInst<(outs), (ins Int16Regs:$val, Int16Regs:$val2,
1904 Int16Regs:$val3, Int16Regs:$val4,
1905 i32imm:$a, i32imm:$b),
1906 "st.param.v4.b8\t[param$a+$b], {{$val, $val2, $val3, $val4}};",
1909 def StoreParamF32 : StoreParamInst<Float32Regs, ".f32">;
1910 def StoreParamF64 : StoreParamInst<Float64Regs, ".f64">;
1911 def StoreParamV2F32 : StoreParamV2Inst<Float32Regs, ".f32">;
1912 def StoreParamV2F64 : StoreParamV2Inst<Float64Regs, ".f64">;
1913 // FIXME: StoreParamV4Inst crashes llvm-tblgen :(
1914 //def StoreParamV4F32 : StoreParamV4Inst<Float32Regs, ".f32">;
1915 def StoreParamV4F32 : NVPTXInst<(outs),
1916 (ins Float32Regs:$val, Float32Regs:$val2,
1917 Float32Regs:$val3, Float32Regs:$val4,
1918 i32imm:$a, i32imm:$b),
1919 "st.param.v4.f32\t[param$a+$b], {{$val, $val2, $val3, $val4}};",
1923 def StoreRetvalI64 : StoreRetvalInst<Int64Regs, ".b64">;
1924 def StoreRetvalI32 : StoreRetvalInst<Int32Regs, ".b32">;
1925 def StoreRetvalI16 : StoreRetvalInst<Int16Regs, ".b16">;
1926 def StoreRetvalI8 : StoreRetvalInst<Int16Regs, ".b8">;
1927 def StoreRetvalV2I64 : StoreRetvalV2Inst<Int64Regs, ".b64">;
1928 def StoreRetvalV2I32 : StoreRetvalV2Inst<Int32Regs, ".b32">;
1929 def StoreRetvalV2I16 : StoreRetvalV2Inst<Int16Regs, ".b16">;
1930 def StoreRetvalV2I8 : StoreRetvalV2Inst<Int16Regs, ".b8">;
1931 def StoreRetvalV4I32 : StoreRetvalV4Inst<Int32Regs, ".b32">;
1932 def StoreRetvalV4I16 : StoreRetvalV4Inst<Int16Regs, ".b16">;
1933 def StoreRetvalV4I8 : StoreRetvalV4Inst<Int16Regs, ".b8">;
1935 def StoreRetvalF64 : StoreRetvalInst<Float64Regs, ".f64">;
1936 def StoreRetvalF32 : StoreRetvalInst<Float32Regs, ".f32">;
1937 def StoreRetvalV2F64 : StoreRetvalV2Inst<Float64Regs, ".f64">;
1938 def StoreRetvalV2F32 : StoreRetvalV2Inst<Float32Regs, ".f32">;
1939 def StoreRetvalV4F32 : StoreRetvalV4Inst<Float32Regs, ".f32">;
1941 def CallArgBeginInst : NVPTXInst<(outs), (ins), "(", [(CallArgBegin)]>;
1942 def CallArgEndInst1 : NVPTXInst<(outs), (ins), ");", [(CallArgEnd (i32 1))]>;
1943 def CallArgEndInst0 : NVPTXInst<(outs), (ins), ")", [(CallArgEnd (i32 0))]>;
1944 def RETURNInst : NVPTXInst<(outs), (ins), "ret;", [(RETURNNode)]>;
1946 class CallArgInst<NVPTXRegClass regclass> :
1947 NVPTXInst<(outs), (ins regclass:$a), "$a, ",
1948 [(CallArg (i32 0), regclass:$a)]>;
1950 class LastCallArgInst<NVPTXRegClass regclass> :
1951 NVPTXInst<(outs), (ins regclass:$a), "$a",
1952 [(LastCallArg (i32 0), regclass:$a)]>;
1954 def CallArgI64 : CallArgInst<Int64Regs>;
1955 def CallArgI32 : CallArgInst<Int32Regs>;
1956 def CallArgI16 : CallArgInst<Int16Regs>;
1958 def CallArgF64 : CallArgInst<Float64Regs>;
1959 def CallArgF32 : CallArgInst<Float32Regs>;
1961 def LastCallArgI64 : LastCallArgInst<Int64Regs>;
1962 def LastCallArgI32 : LastCallArgInst<Int32Regs>;
1963 def LastCallArgI16 : LastCallArgInst<Int16Regs>;
1965 def LastCallArgF64 : LastCallArgInst<Float64Regs>;
1966 def LastCallArgF32 : LastCallArgInst<Float32Regs>;
1968 def CallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a, ",
1969 [(CallArg (i32 0), (i32 imm:$a))]>;
1970 def LastCallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a",
1971 [(LastCallArg (i32 0), (i32 imm:$a))]>;
1973 def CallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a, ",
1974 [(CallArg (i32 1), (i32 imm:$a))]>;
1975 def LastCallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a",
1976 [(LastCallArg (i32 1), (i32 imm:$a))]>;
1978 def CallVoidInst : NVPTXInst<(outs), (ins imem:$addr),
1980 [(CallVoid (Wrapper tglobaladdr:$addr))]>;
1981 def CallVoidInstReg : NVPTXInst<(outs), (ins Int32Regs:$addr),
1983 [(CallVoid Int32Regs:$addr)]>;
1984 def CallVoidInstReg64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
1986 [(CallVoid Int64Regs:$addr)]>;
1987 def PrototypeInst : NVPTXInst<(outs), (ins i32imm:$val),
1988 ", prototype_$val;",
1989 [(Prototype (i32 imm:$val))]>;
1991 def DeclareRetMemInst : NVPTXInst<(outs),
1992 (ins i32imm:$align, i32imm:$size, i32imm:$num),
1993 ".param .align $align .b8 retval$num[$size];",
1994 [(DeclareRetParam (i32 imm:$align), (i32 imm:$size), (i32 imm:$num))]>;
1995 def DeclareRetScalarInst : NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num),
1996 ".param .b$size retval$num;",
1997 [(DeclareRet (i32 1), (i32 imm:$size), (i32 imm:$num))]>;
1998 def DeclareRetRegInst : NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num),
1999 ".reg .b$size retval$num;",
2000 [(DeclareRet (i32 2), (i32 imm:$size), (i32 imm:$num))]>;
2002 def DeclareParamInst : NVPTXInst<(outs),
2003 (ins i32imm:$align, i32imm:$a, i32imm:$size),
2004 ".param .align $align .b8 param$a[$size];",
2005 [(DeclareParam (i32 imm:$align), (i32 imm:$a), (i32 imm:$size))]>;
2006 def DeclareScalarParamInst : NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
2007 ".param .b$size param$a;",
2008 [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 0))]>;
2009 def DeclareScalarRegInst : NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
2010 ".reg .b$size param$a;",
2011 [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 1))]>;
2013 class MoveParamInst<NVPTXRegClass regclass, string asmstr> :
2014 NVPTXInst<(outs regclass:$dst), (ins regclass:$src),
2015 !strconcat(!strconcat("mov", asmstr), "\t$dst, $src;"),
2016 [(set regclass:$dst, (MoveParam regclass:$src))]>;
2018 def MoveParamI64 : MoveParamInst<Int64Regs, ".b64">;
2019 def MoveParamI32 : MoveParamInst<Int32Regs, ".b32">;
2020 def MoveParamI16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
2021 "cvt.u16.u32\t$dst, $src;",
2022 [(set Int16Regs:$dst, (MoveParam Int16Regs:$src))]>;
2023 def MoveParamF64 : MoveParamInst<Float64Regs, ".f64">;
2024 def MoveParamF32 : MoveParamInst<Float32Regs, ".f32">;
2026 class PseudoUseParamInst<NVPTXRegClass regclass> :
2027 NVPTXInst<(outs), (ins regclass:$src),
2028 "// Pseudo use of $src",
2029 [(PseudoUseParam regclass:$src)]>;
2031 def PseudoUseParamI64 : PseudoUseParamInst<Int64Regs>;
2032 def PseudoUseParamI32 : PseudoUseParamInst<Int32Regs>;
2033 def PseudoUseParamI16 : PseudoUseParamInst<Int16Regs>;
2034 def PseudoUseParamF64 : PseudoUseParamInst<Float64Regs>;
2035 def PseudoUseParamF32 : PseudoUseParamInst<Float32Regs>;
2039 // Load / Store Handling
2041 multiclass LD<NVPTXRegClass regclass> {
2042 def _avar : NVPTXInst<(outs regclass:$dst),
2043 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2044 i32imm:$fromWidth, imem:$addr),
2045 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2046 "$fromWidth \t$dst, [$addr];"), []>;
2047 def _areg : NVPTXInst<(outs regclass:$dst),
2048 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2049 i32imm:$fromWidth, Int32Regs:$addr),
2050 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2051 "$fromWidth \t$dst, [$addr];"), []>;
2052 def _areg_64 : NVPTXInst<(outs regclass:$dst),
2053 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2054 i32imm:$fromWidth, Int64Regs:$addr),
2055 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth",
2056 " \t$dst, [$addr];"), []>;
2057 def _ari : NVPTXInst<(outs regclass:$dst),
2058 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2059 i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
2060 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2061 "$fromWidth \t$dst, [$addr+$offset];"), []>;
2062 def _ari_64 : NVPTXInst<(outs regclass:$dst),
2063 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2064 i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
2065 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth",
2066 " \t$dst, [$addr+$offset];"), []>;
2067 def _asi : NVPTXInst<(outs regclass:$dst),
2068 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2069 i32imm:$fromWidth, imem:$addr, i32imm:$offset),
2070 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2071 "$fromWidth \t$dst, [$addr+$offset];"), []>;
2074 let mayLoad=1, neverHasSideEffects=1 in {
2075 defm LD_i8 : LD<Int16Regs>;
2076 defm LD_i16 : LD<Int16Regs>;
2077 defm LD_i32 : LD<Int32Regs>;
2078 defm LD_i64 : LD<Int64Regs>;
2079 defm LD_f32 : LD<Float32Regs>;
2080 defm LD_f64 : LD<Float64Regs>;
2083 multiclass ST<NVPTXRegClass regclass> {
2084 def _avar : NVPTXInst<(outs),
2085 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2086 LdStCode:$Sign, i32imm:$toWidth, imem:$addr),
2087 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
2088 " \t[$addr], $src;"), []>;
2089 def _areg : NVPTXInst<(outs),
2090 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2091 LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr),
2092 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
2093 " \t[$addr], $src;"), []>;
2094 def _areg_64 : NVPTXInst<(outs),
2095 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2096 LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr),
2097 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ",
2098 "\t[$addr], $src;"), []>;
2099 def _ari : NVPTXInst<(outs),
2100 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2101 LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr, i32imm:$offset),
2102 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
2103 " \t[$addr+$offset], $src;"), []>;
2104 def _ari_64 : NVPTXInst<(outs),
2105 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2106 LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr, i32imm:$offset),
2107 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ",
2108 "\t[$addr+$offset], $src;"), []>;
2109 def _asi : NVPTXInst<(outs),
2110 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2111 LdStCode:$Sign, i32imm:$toWidth, imem:$addr, i32imm:$offset),
2112 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
2113 " \t[$addr+$offset], $src;"), []>;
2116 let mayStore=1, neverHasSideEffects=1 in {
2117 defm ST_i8 : ST<Int16Regs>;
2118 defm ST_i16 : ST<Int16Regs>;
2119 defm ST_i32 : ST<Int32Regs>;
2120 defm ST_i64 : ST<Int64Regs>;
2121 defm ST_f32 : ST<Float32Regs>;
2122 defm ST_f64 : ST<Float64Regs>;
2125 // The following is used only in and after vector elementizations.
2126 // Vector elementization happens at the machine instruction level, so the
2127 // following instruction
2128 // never appears in the DAG.
2129 multiclass LD_VEC<NVPTXRegClass regclass> {
2130 def _v2_avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2131 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2132 i32imm:$fromWidth, imem:$addr),
2133 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2134 "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
2135 def _v2_areg : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2136 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2137 i32imm:$fromWidth, Int32Regs:$addr),
2138 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2139 "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
2140 def _v2_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2141 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2142 i32imm:$fromWidth, Int64Regs:$addr),
2143 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2144 "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
2145 def _v2_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2146 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2147 i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
2148 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2149 "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
2150 def _v2_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2151 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2152 i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
2153 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2154 "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
2155 def _v2_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2156 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2157 i32imm:$fromWidth, imem:$addr, i32imm:$offset),
2158 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2159 "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
2160 def _v4_avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
2161 regclass:$dst3, regclass:$dst4),
2162 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2163 i32imm:$fromWidth, imem:$addr),
2164 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2165 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
2166 def _v4_areg : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2168 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2169 i32imm:$fromWidth, Int32Regs:$addr),
2170 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2171 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
2172 def _v4_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
2173 regclass:$dst3, regclass:$dst4),
2174 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2175 i32imm:$fromWidth, Int64Regs:$addr),
2176 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2177 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
2178 def _v4_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2180 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2181 i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
2182 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2183 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
2185 def _v4_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
2186 regclass:$dst3, regclass:$dst4),
2187 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2188 i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
2189 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2190 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
2192 def _v4_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2194 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2195 i32imm:$fromWidth, imem:$addr, i32imm:$offset),
2196 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2197 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
2200 let mayLoad=1, neverHasSideEffects=1 in {
2201 defm LDV_i8 : LD_VEC<Int16Regs>;
2202 defm LDV_i16 : LD_VEC<Int16Regs>;
2203 defm LDV_i32 : LD_VEC<Int32Regs>;
2204 defm LDV_i64 : LD_VEC<Int64Regs>;
2205 defm LDV_f32 : LD_VEC<Float32Regs>;
2206 defm LDV_f64 : LD_VEC<Float64Regs>;
2209 multiclass ST_VEC<NVPTXRegClass regclass> {
2210 def _v2_avar : NVPTXInst<(outs),
2211 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2212 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr),
2213 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2214 "$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
2215 def _v2_areg : NVPTXInst<(outs),
2216 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2217 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr),
2218 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2219 "$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
2220 def _v2_areg_64 : NVPTXInst<(outs),
2221 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2222 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr),
2223 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2224 "$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
2225 def _v2_ari : NVPTXInst<(outs),
2226 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2227 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr,
2229 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2230 "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
2231 def _v2_ari_64 : NVPTXInst<(outs),
2232 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2233 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr,
2235 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2236 "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
2237 def _v2_asi : NVPTXInst<(outs),
2238 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2239 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr,
2241 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2242 "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
2243 def _v4_avar : NVPTXInst<(outs),
2244 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2245 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2246 i32imm:$fromWidth, imem:$addr),
2247 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2248 "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
2249 def _v4_areg : NVPTXInst<(outs),
2250 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2251 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2252 i32imm:$fromWidth, Int32Regs:$addr),
2253 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2254 "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
2255 def _v4_areg_64 : NVPTXInst<(outs),
2256 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2257 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2258 i32imm:$fromWidth, Int64Regs:$addr),
2259 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2260 "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
2261 def _v4_ari : NVPTXInst<(outs),
2262 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2263 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2264 i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
2265 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2266 "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
2268 def _v4_ari_64 : NVPTXInst<(outs),
2269 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2270 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2271 i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
2272 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2273 "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
2275 def _v4_asi : NVPTXInst<(outs),
2276 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2277 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2278 i32imm:$fromWidth, imem:$addr, i32imm:$offset),
2279 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2280 "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
2283 let mayStore=1, neverHasSideEffects=1 in {
2284 defm STV_i8 : ST_VEC<Int16Regs>;
2285 defm STV_i16 : ST_VEC<Int16Regs>;
2286 defm STV_i32 : ST_VEC<Int32Regs>;
2287 defm STV_i64 : ST_VEC<Int64Regs>;
2288 defm STV_f32 : ST_VEC<Float32Regs>;
2289 defm STV_f64 : ST_VEC<Float64Regs>;
2293 //---- Conversion ----
2295 class F_BITCONVERT<string SzStr, NVPTXRegClass regclassIn,
2296 NVPTXRegClass regclassOut> :
2297 NVPTXInst<(outs regclassOut:$d), (ins regclassIn:$a),
2298 !strconcat("mov.b", !strconcat(SzStr, " \t $d, $a;")),
2299 [(set regclassOut:$d, (bitconvert regclassIn:$a))]>;
2301 def BITCONVERT_32_I2F : F_BITCONVERT<"32", Int32Regs, Float32Regs>;
2302 def BITCONVERT_32_F2I : F_BITCONVERT<"32", Float32Regs, Int32Regs>;
2303 def BITCONVERT_64_I2F : F_BITCONVERT<"64", Int64Regs, Float64Regs>;
2304 def BITCONVERT_64_F2I : F_BITCONVERT<"64", Float64Regs, Int64Regs>;
2306 // NOTE: pred->fp are currently sub-optimal due to an issue in TableGen where
2307 // we cannot specify floating-point literals in isel patterns. Therefore, we
2308 // use an integer selp to select either 1 or 0 and then cvt to floating-point.
2311 def : Pat<(f32 (sint_to_fp Int1Regs:$a)),
2312 (CVT_f32_s32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>;
2313 def : Pat<(f32 (sint_to_fp Int16Regs:$a)),
2314 (CVT_f32_s16 Int16Regs:$a, CvtRN)>;
2315 def : Pat<(f32 (sint_to_fp Int32Regs:$a)),
2316 (CVT_f32_s32 Int32Regs:$a, CvtRN)>;
2317 def : Pat<(f32 (sint_to_fp Int64Regs:$a)),
2318 (CVT_f32_s64 Int64Regs:$a, CvtRN)>;
2321 def : Pat<(f32 (uint_to_fp Int1Regs:$a)),
2322 (CVT_f32_u32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>;
2323 def : Pat<(f32 (uint_to_fp Int16Regs:$a)),
2324 (CVT_f32_u16 Int16Regs:$a, CvtRN)>;
2325 def : Pat<(f32 (uint_to_fp Int32Regs:$a)),
2326 (CVT_f32_u32 Int32Regs:$a, CvtRN)>;
2327 def : Pat<(f32 (uint_to_fp Int64Regs:$a)),
2328 (CVT_f32_u64 Int64Regs:$a, CvtRN)>;
2331 def : Pat<(f64 (sint_to_fp Int1Regs:$a)),
2332 (CVT_f64_s32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>;
2333 def : Pat<(f64 (sint_to_fp Int16Regs:$a)),
2334 (CVT_f64_s16 Int16Regs:$a, CvtRN)>;
2335 def : Pat<(f64 (sint_to_fp Int32Regs:$a)),
2336 (CVT_f64_s32 Int32Regs:$a, CvtRN)>;
2337 def : Pat<(f64 (sint_to_fp Int64Regs:$a)),
2338 (CVT_f64_s64 Int64Regs:$a, CvtRN)>;
2341 def : Pat<(f64 (uint_to_fp Int1Regs:$a)),
2342 (CVT_f64_u32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>;
2343 def : Pat<(f64 (uint_to_fp Int16Regs:$a)),
2344 (CVT_f64_u16 Int16Regs:$a, CvtRN)>;
2345 def : Pat<(f64 (uint_to_fp Int32Regs:$a)),
2346 (CVT_f64_u32 Int32Regs:$a, CvtRN)>;
2347 def : Pat<(f64 (uint_to_fp Int64Regs:$a)),
2348 (CVT_f64_u64 Int64Regs:$a, CvtRN)>;
2352 def : Pat<(i1 (fp_to_sint Float32Regs:$a)),
2353 (SETP_b32ri (BITCONVERT_32_F2I Float32Regs:$a), 0, CmpEQ)>;
2354 def : Pat<(i16 (fp_to_sint Float32Regs:$a)),
2355 (CVT_s16_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
2356 def : Pat<(i16 (fp_to_sint Float32Regs:$a)),
2357 (CVT_s16_f32 Float32Regs:$a, CvtRZI)>;
2358 def : Pat<(i32 (fp_to_sint Float32Regs:$a)),
2359 (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
2360 def : Pat<(i32 (fp_to_sint Float32Regs:$a)),
2361 (CVT_s32_f32 Float32Regs:$a, CvtRZI)>;
2362 def : Pat<(i64 (fp_to_sint Float32Regs:$a)),
2363 (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
2364 def : Pat<(i64 (fp_to_sint Float32Regs:$a)),
2365 (CVT_s64_f32 Float32Regs:$a, CvtRZI)>;
2368 def : Pat<(i1 (fp_to_uint Float32Regs:$a)),
2369 (SETP_b32ri (BITCONVERT_32_F2I Float32Regs:$a), 0, CmpEQ)>;
2370 def : Pat<(i16 (fp_to_uint Float32Regs:$a)),
2371 (CVT_u16_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
2372 def : Pat<(i16 (fp_to_uint Float32Regs:$a)),
2373 (CVT_u16_f32 Float32Regs:$a, CvtRZI)>;
2374 def : Pat<(i32 (fp_to_uint Float32Regs:$a)),
2375 (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
2376 def : Pat<(i32 (fp_to_uint Float32Regs:$a)),
2377 (CVT_u32_f32 Float32Regs:$a, CvtRZI)>;
2378 def : Pat<(i64 (fp_to_uint Float32Regs:$a)),
2379 (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
2380 def : Pat<(i64 (fp_to_uint Float32Regs:$a)),
2381 (CVT_u64_f32 Float32Regs:$a, CvtRZI)>;
2384 def : Pat<(i1 (fp_to_sint Float64Regs:$a)),
2385 (SETP_b64ri (BITCONVERT_64_F2I Float64Regs:$a), 0, CmpEQ)>;
2386 def : Pat<(i16 (fp_to_sint Float64Regs:$a)),
2387 (CVT_s16_f64 Float64Regs:$a, CvtRZI)>;
2388 def : Pat<(i32 (fp_to_sint Float64Regs:$a)),
2389 (CVT_s32_f64 Float64Regs:$a, CvtRZI)>;
2390 def : Pat<(i64 (fp_to_sint Float64Regs:$a)),
2391 (CVT_s64_f64 Float64Regs:$a, CvtRZI)>;
2394 def : Pat<(i1 (fp_to_uint Float64Regs:$a)),
2395 (SETP_b64ri (BITCONVERT_64_F2I Float64Regs:$a), 0, CmpEQ)>;
2396 def : Pat<(i16 (fp_to_uint Float64Regs:$a)),
2397 (CVT_u16_f64 Float64Regs:$a, CvtRZI)>;
2398 def : Pat<(i32 (fp_to_uint Float64Regs:$a)),
2399 (CVT_u32_f64 Float64Regs:$a, CvtRZI)>;
2400 def : Pat<(i64 (fp_to_uint Float64Regs:$a)),
2401 (CVT_u64_f64 Float64Regs:$a, CvtRZI)>;
2404 def : Pat<(i16 (sext Int1Regs:$a)),
2405 (SELP_s16ii -1, 0, Int1Regs:$a)>;
2406 def : Pat<(i32 (sext Int1Regs:$a)),
2407 (SELP_s32ii -1, 0, Int1Regs:$a)>;
2408 def : Pat<(i64 (sext Int1Regs:$a)),
2409 (SELP_s64ii -1, 0, Int1Regs:$a)>;
2412 def : Pat<(i16 (zext Int1Regs:$a)),
2413 (SELP_u16ii 1, 0, Int1Regs:$a)>;
2414 def : Pat<(i32 (zext Int1Regs:$a)),
2415 (SELP_u32ii 1, 0, Int1Regs:$a)>;
2416 def : Pat<(i64 (zext Int1Regs:$a)),
2417 (SELP_u64ii 1, 0, Int1Regs:$a)>;
2420 def : Pat<(i16 (anyext Int1Regs:$a)),
2421 (SELP_u16ii -1, 0, Int1Regs:$a)>;
2422 def : Pat<(i32 (anyext Int1Regs:$a)),
2423 (SELP_u32ii -1, 0, Int1Regs:$a)>;
2424 def : Pat<(i64 (anyext Int1Regs:$a)),
2425 (SELP_u64ii -1, 0, Int1Regs:$a)>;
2428 def : Pat<(i32 (sext Int16Regs:$a)),
2429 (CVT_s32_s16 Int16Regs:$a, CvtNONE)>;
2430 def : Pat<(i64 (sext Int16Regs:$a)),
2431 (CVT_s64_s16 Int16Regs:$a, CvtNONE)>;
2434 def : Pat<(i32 (zext Int16Regs:$a)),
2435 (CVT_u32_u16 Int16Regs:$a, CvtNONE)>;
2436 def : Pat<(i64 (zext Int16Regs:$a)),
2437 (CVT_u64_u16 Int16Regs:$a, CvtNONE)>;
2440 def : Pat<(i32 (anyext Int16Regs:$a)),
2441 (CVT_u32_u16 Int16Regs:$a, CvtNONE)>;
2442 def : Pat<(i64 (anyext Int16Regs:$a)),
2443 (CVT_u64_u16 Int16Regs:$a, CvtNONE)>;
2446 def : Pat<(i64 (sext Int32Regs:$a)),
2447 (CVT_s64_s32 Int32Regs:$a, CvtNONE)>;
2450 def : Pat<(i64 (zext Int32Regs:$a)),
2451 (CVT_u64_u32 Int32Regs:$a, CvtNONE)>;
2454 def : Pat<(i64 (anyext Int32Regs:$a)),
2455 (CVT_u64_u32 Int32Regs:$a, CvtNONE)>;
2459 def : Pat<(i32 (trunc Int64Regs:$a)),
2460 (CVT_u32_u64 Int64Regs:$a, CvtNONE)>;
2461 def : Pat<(i16 (trunc Int64Regs:$a)),
2462 (CVT_u16_u64 Int64Regs:$a, CvtNONE)>;
2463 def : Pat<(i1 (trunc Int64Regs:$a)),
2464 (SETP_b64ri (ANDb64ri Int64Regs:$a, 1), 1, CmpEQ)>;
2467 def : Pat<(i16 (trunc Int32Regs:$a)),
2468 (CVT_u16_u32 Int32Regs:$a, CvtNONE)>;
2469 def : Pat<(i1 (trunc Int32Regs:$a)),
2470 (SETP_b32ri (ANDb32ri Int32Regs:$a, 1), 1, CmpEQ)>;
2473 def : Pat<(i1 (trunc Int16Regs:$a)),
2474 (SETP_b16ri (ANDb16ri Int16Regs:$a, 1), 1, CmpEQ)>;
2477 def : Pat<(sext_inreg Int16Regs:$a, i8), (CVT_INREG_s16_s8 Int16Regs:$a)>;
2478 def : Pat<(sext_inreg Int32Regs:$a, i8), (CVT_INREG_s32_s8 Int32Regs:$a)>;
2479 def : Pat<(sext_inreg Int32Regs:$a, i16), (CVT_INREG_s32_s16 Int32Regs:$a)>;
2480 def : Pat<(sext_inreg Int64Regs:$a, i8), (CVT_INREG_s64_s8 Int64Regs:$a)>;
2481 def : Pat<(sext_inreg Int64Regs:$a, i16), (CVT_INREG_s64_s16 Int64Regs:$a)>;
2482 def : Pat<(sext_inreg Int64Regs:$a, i32), (CVT_INREG_s64_s32 Int64Regs:$a)>;
2485 // Select instructions with 32-bit predicates
2486 def : Pat<(select Int32Regs:$pred, Int16Regs:$a, Int16Regs:$b),
2487 (SELP_b16rr Int16Regs:$a, Int16Regs:$b,
2488 (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
2489 def : Pat<(select Int32Regs:$pred, Int32Regs:$a, Int32Regs:$b),
2490 (SELP_b32rr Int32Regs:$a, Int32Regs:$b,
2491 (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
2492 def : Pat<(select Int32Regs:$pred, Int64Regs:$a, Int64Regs:$b),
2493 (SELP_b64rr Int64Regs:$a, Int64Regs:$b,
2494 (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
2495 def : Pat<(select Int32Regs:$pred, Float32Regs:$a, Float32Regs:$b),
2496 (SELP_f32rr Float32Regs:$a, Float32Regs:$b,
2497 (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
2498 def : Pat<(select Int32Regs:$pred, Float64Regs:$a, Float64Regs:$b),
2499 (SELP_f64rr Float64Regs:$a, Float64Regs:$b,
2500 (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
2503 // pack a set of smaller int registers to a larger int register
2504 def V4I16toI64 : NVPTXInst<(outs Int64Regs:$d),
2505 (ins Int16Regs:$s1, Int16Regs:$s2,
2506 Int16Regs:$s3, Int16Regs:$s4),
2507 "mov.b64\t$d, {{$s1, $s2, $s3, $s4}};",
2509 def V2I16toI32 : NVPTXInst<(outs Int32Regs:$d),
2510 (ins Int16Regs:$s1, Int16Regs:$s2),
2511 "mov.b32\t$d, {{$s1, $s2}};",
2513 def V2I32toI64 : NVPTXInst<(outs Int64Regs:$d),
2514 (ins Int32Regs:$s1, Int32Regs:$s2),
2515 "mov.b64\t$d, {{$s1, $s2}};",
2517 def V2F32toF64 : NVPTXInst<(outs Float64Regs:$d),
2518 (ins Float32Regs:$s1, Float32Regs:$s2),
2519 "mov.b64\t$d, {{$s1, $s2}};",
2522 // unpack a larger int register to a set of smaller int registers
2523 def I64toV4I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2,
2524 Int16Regs:$d3, Int16Regs:$d4),
2526 "mov.b64\t{{$d1, $d2, $d3, $d4}}, $s;",
2528 def I32toV2I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2),
2530 "mov.b32\t{{$d1, $d2}}, $s;",
2532 def I64toV2I32 : NVPTXInst<(outs Int32Regs:$d1, Int32Regs:$d2),
2534 "mov.b64\t{{$d1, $d2}}, $s;",
2536 def F64toV2F32 : NVPTXInst<(outs Float32Regs:$d1, Float32Regs:$d2),
2537 (ins Float64Regs:$s),
2538 "mov.b64\t{{$d1, $d2}}, $s;",
2541 // Count leading zeros
2542 def CLZr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a),
2545 def CLZr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
2549 // 32-bit has a direct PTX instruction
2550 def : Pat<(ctlz Int32Regs:$a),
2551 (CLZr32 Int32Regs:$a)>;
2552 def : Pat<(ctlz_zero_undef Int32Regs:$a),
2553 (CLZr32 Int32Regs:$a)>;
2555 // For 64-bit, the result in PTX is actually 32-bit so we zero-extend
2556 // to 64-bit to match the LLVM semantics
2557 def : Pat<(ctlz Int64Regs:$a),
2558 (CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>;
2559 def : Pat<(ctlz_zero_undef Int64Regs:$a),
2560 (CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>;
2562 // For 16-bit, we zero-extend to 32-bit, then trunc the result back
2563 // to 16-bits (ctlz of a 16-bit value is guaranteed to require less
2564 // than 16 bits to store). We also need to subtract 16 because the
2565 // high-order 16 zeros were counted.
2566 def : Pat<(ctlz Int16Regs:$a),
2567 (SUBi16ri (CVT_u16_u32 (CLZr32
2568 (CVT_u32_u16 Int16Regs:$a, CvtNONE)),
2570 def : Pat<(ctlz_zero_undef Int16Regs:$a),
2571 (SUBi16ri (CVT_u16_u32 (CLZr32
2572 (CVT_u32_u16 Int16Regs:$a, CvtNONE)),
2576 def POPCr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a),
2577 "popc.b32\t$d, $a;",
2579 def POPCr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
2580 "popc.b64\t$d, $a;",
2583 // 32-bit has a direct PTX instruction
2584 def : Pat<(ctpop Int32Regs:$a),
2585 (POPCr32 Int32Regs:$a)>;
2587 // For 64-bit, the result in PTX is actually 32-bit so we zero-extend
2588 // to 64-bit to match the LLVM semantics
2589 def : Pat<(ctpop Int64Regs:$a),
2590 (CVT_u64_u32 (POPCr64 Int64Regs:$a), CvtNONE)>;
2592 // For 16-bit, we zero-extend to 32-bit, then trunc the result back
2593 // to 16-bits (ctpop of a 16-bit value is guaranteed to require less
2594 // than 16 bits to store)
2595 def : Pat<(ctpop Int16Regs:$a),
2596 (CVT_u16_u32 (POPCr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)),
2599 // fround f64 -> f32
2600 def : Pat<(f32 (fround Float64Regs:$a)),
2601 (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>;
2602 def : Pat<(f32 (fround Float64Regs:$a)),
2603 (CVT_f32_f64 Float64Regs:$a, CvtRN)>;
2605 // fextend f32 -> f64
2606 def : Pat<(f64 (fextend Float32Regs:$a)),
2607 (CVT_f64_f32 Float32Regs:$a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>;
2608 def : Pat<(f64 (fextend Float32Regs:$a)),
2609 (CVT_f64_f32 Float32Regs:$a, CvtNONE)>;
2611 def retflag : SDNode<"NVPTXISD::RET_FLAG", SDTNone,
2612 [SDNPHasChain, SDNPOptInGlue]>;
2614 //-----------------------------------
2616 //-----------------------------------
2618 let isTerminator=1 in {
2619 let isReturn=1, isBarrier=1 in
2620 def Return : NVPTXInst<(outs), (ins), "ret;", [(retflag)]>;
2623 def CBranch : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target),
2624 "@$a bra \t$target;",
2625 [(brcond Int1Regs:$a, bb:$target)]>;
2627 def CBranchOther : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target),
2628 "@!$a bra \t$target;",
2631 let isBranch=1, isBarrier=1 in
2632 def GOTO : NVPTXInst<(outs), (ins brtarget:$target),
2633 "bra.uni \t$target;",
2637 def : Pat<(brcond Int32Regs:$a, bb:$target),
2638 (CBranch (SETP_u32ri Int32Regs:$a, 0, CmpNE), bb:$target)>;
2640 // SelectionDAGBuilder::visitSWitchCase() will invert the condition of a
2641 // conditional branch if
2642 // the target block is the next block so that the code can fall through to the
2644 // The invertion is done by 'xor condition, 1', which will be translated to
2645 // (setne condition, -1).
2646 // Since ptx supports '@!pred bra target', we should use it.
2647 def : Pat<(brcond (i1 (setne Int1Regs:$a, -1)), bb:$target),
2648 (CBranchOther Int1Regs:$a, bb:$target)>;
2651 def SDT_NVPTXCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
2652 def SDT_NVPTXCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
2653 SDTCisVT<1, i32> ]>;
2655 def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_NVPTXCallSeqStart,
2656 [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
2657 def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_NVPTXCallSeqEnd,
2658 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
2661 def SDT_NVPTXCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
2662 def call : SDNode<"NVPTXISD::CALL", SDT_NVPTXCall,
2663 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
2664 def calltarget : Operand<i32>;
2666 def CALL : NVPTXInst<(outs), (ins calltarget:$dst),
2667 "call \t$dst, (1);", []>;
2670 def : Pat<(call tglobaladdr:$dst),
2671 (CALL tglobaladdr:$dst)>;
2672 def : Pat<(call texternalsym:$dst),
2673 (CALL texternalsym:$dst)>;
2675 // Pseudo instructions.
2676 class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
2677 : NVPTXInst<outs, ins, asmstr, pattern>;
2679 // @TODO: We use some tricks here to emit curly braces. Can we clean this up
2680 // a bit without TableGen modifications?
2681 def Callseq_Start : NVPTXInst<(outs), (ins i32imm:$amt),
2682 "// Callseq Start $amt\n\t{{\n\t.reg .b32 temp_param_reg;\n\t// <end>}}",
2683 [(callseq_start timm:$amt)]>;
2684 def Callseq_End : NVPTXInst<(outs), (ins i32imm:$amt1, i32imm:$amt2),
2685 "\n\t//{{\n\t}}// Callseq End $amt1",
2686 [(callseq_end timm:$amt1, timm:$amt2)]>;
2690 def trapinst : NVPTXInst<(outs), (ins),
2694 // Call prototype wrapper
2695 def SDTCallPrototype : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
2697 : SDNode<"NVPTXISD::CallPrototype", SDTCallPrototype,
2698 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
2699 def ProtoIdent : Operand<i32> {
2700 let PrintMethod = "printProtoIdent";
2703 : NVPTXInst<(outs), (ins ProtoIdent:$ident),
2704 "$ident", [(CallPrototype (i32 texternalsym:$ident))]>;
2708 include "NVPTXIntrinsics.td"
2711 //-----------------------------------
2713 //-----------------------------------
2714 // BSWAP is currently expanded. The following is a more efficient
2715 // - for < sm_20, use vector scalar mov, as tesla support native 16-bit register
2716 // - for sm_20, use pmpt (use vector scalar mov to get the pack and
2717 // unpack). sm_20 supports native 32-bit register, but not native 16-bit