Merging r258471:
[oota-llvm.git] / lib / Target / NVPTX / NVPTXIntrinsics.td
1 //===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9
10 def immFloat0 : PatLeaf<(fpimm), [{
11     float f = (float)N->getValueAPF().convertToFloat();
12     return (f==0.0f);
13 }]>;
14
15 def immFloat1 : PatLeaf<(fpimm), [{
16     float f = (float)N->getValueAPF().convertToFloat();
17     return (f==1.0f);
18 }]>;
19
20 def immDouble0 : PatLeaf<(fpimm), [{
21     double d = (double)N->getValueAPF().convertToDouble();
22     return (d==0.0);
23 }]>;
24
25 def immDouble1 : PatLeaf<(fpimm), [{
26     double d = (double)N->getValueAPF().convertToDouble();
27     return (d==1.0);
28 }]>;
29
30
31
32 //-----------------------------------
33 // Synchronization Functions
34 //-----------------------------------
35 def INT_CUDA_SYNCTHREADS : NVPTXInst<(outs), (ins),
36                   "bar.sync \t0;",
37       [(int_cuda_syncthreads)]>;
38 def INT_BARRIER0 : NVPTXInst<(outs), (ins),
39                   "bar.sync \t0;",
40       [(int_nvvm_barrier0)]>;
41 def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
42   !strconcat("{{ \n\t",
43       !strconcat(".reg .pred \t%p1; \n\t",
44       !strconcat("setp.ne.u32 \t%p1, $pred, 0; \n\t",
45       !strconcat("bar.red.popc.u32 \t$dst, 0, %p1; \n\t",
46         !strconcat("}}", ""))))),
47       [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>;
48 def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
49   !strconcat("{{ \n\t",
50       !strconcat(".reg .pred \t%p1; \n\t",
51       !strconcat(".reg .pred \t%p2; \n\t",
52       !strconcat("setp.ne.u32 \t%p1, $pred, 0; \n\t",
53       !strconcat("bar.red.and.pred \t%p2, 0, %p1; \n\t",
54       !strconcat("selp.u32 \t$dst, 1, 0, %p2; \n\t",
55         !strconcat("}}", ""))))))),
56       [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>;
57 def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
58   !strconcat("{{ \n\t",
59       !strconcat(".reg .pred \t%p1; \n\t",
60       !strconcat(".reg .pred \t%p2; \n\t",
61       !strconcat("setp.ne.u32 \t%p1, $pred, 0; \n\t",
62       !strconcat("bar.red.or.pred \t%p2, 0, %p1; \n\t",
63       !strconcat("selp.u32 \t$dst, 1, 0, %p2; \n\t",
64         !strconcat("}}", ""))))))),
65       [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>;
66
67
68 //-----------------------------------
69 // Explicit Memory Fence Functions
70 //-----------------------------------
71 class MEMBAR<string StrOp, Intrinsic IntOP> :
72               NVPTXInst<(outs), (ins),
73             StrOp, [(IntOP)]>;
74
75 def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>;
76 def INT_MEMBAR_GL  : MEMBAR<"membar.gl;",  int_nvvm_membar_gl>;
77 def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>;
78
79
80 //-----------------------------------
81 // Math Functions
82 //-----------------------------------
83
84 // Map min(1.0, max(0.0, x)) to sat(x)
85 // Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is
86 // NaN
87 // max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0.
88 // Same story for fmax, fmin.
89
90 def : Pat<(int_nvvm_fmin_f immFloat1,
91             (int_nvvm_fmax_f immFloat0, Float32Regs:$a)),
92           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
93 def : Pat<(int_nvvm_fmin_f immFloat1,
94             (int_nvvm_fmax_f Float32Regs:$a, immFloat0)),
95           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
96 def : Pat<(int_nvvm_fmin_f
97             (int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1),
98           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
99 def : Pat<(int_nvvm_fmin_f
100             (int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1),
101           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
102
103 def : Pat<(int_nvvm_fmin_d immDouble1,
104             (int_nvvm_fmax_d immDouble0, Float64Regs:$a)),
105           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
106 def : Pat<(int_nvvm_fmin_d immDouble1,
107             (int_nvvm_fmax_d Float64Regs:$a, immDouble0)),
108           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
109 def : Pat<(int_nvvm_fmin_d
110             (int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1),
111           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
112 def : Pat<(int_nvvm_fmin_d
113             (int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1),
114           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
115
116
117 // We need a full string for OpcStr here because we need to deal with case like
118 // INT_PTX_RECIP.
119 class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass,
120   NVPTXRegClass src_regclass, Intrinsic IntOP>
121             : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0),
122             OpcStr,
123         [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>;
124
125 // We need a full string for OpcStr here because we need to deal with the case
126 // like INT_PTX_NATIVE_POWR_F.
127 class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass,
128   NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP>
129             : NVPTXInst<(outs t_regclass:$dst),
130               (ins s0_regclass:$src0, s1_regclass:$src1),
131             OpcStr,
132         [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>;
133
134 class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass,
135   NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass,
136   NVPTXRegClass s2_regclass, Intrinsic IntOP>
137             : NVPTXInst<(outs t_regclass:$dst),
138               (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2),
139             OpcStr,
140         [(set t_regclass:$dst,
141           (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>;
142
143 //
144 // MISC
145 //
146
147 def INT_NVVM_CLZ_I : F_MATH_1<"clz.b32 \t$dst, $src0;", Int32Regs, Int32Regs,
148   int_nvvm_clz_i>;
149 def INT_NVVM_CLZ_LL : F_MATH_1<"clz.b64 \t$dst, $src0;", Int32Regs, Int64Regs,
150   int_nvvm_clz_ll>;
151
152 def INT_NVVM_POPC_I : F_MATH_1<"popc.b32 \t$dst, $src0;", Int32Regs, Int32Regs,
153   int_nvvm_popc_i>;
154 def INT_NVVM_POPC_LL : F_MATH_1<"popc.b64 \t$dst, $src0;", Int32Regs, Int64Regs,
155   int_nvvm_popc_ll>;
156
157 def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs,
158   Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>;
159
160 //
161 // Min Max
162 //
163
164 def INT_NVVM_MIN_I : F_MATH_2<"min.s32 \t$dst, $src0, $src1;", Int32Regs,
165   Int32Regs, Int32Regs, int_nvvm_min_i>;
166 def INT_NVVM_MIN_UI : F_MATH_2<"min.u32 \t$dst, $src0, $src1;", Int32Regs,
167   Int32Regs, Int32Regs, int_nvvm_min_ui>;
168
169 def INT_NVVM_MIN_LL : F_MATH_2<"min.s64 \t$dst, $src0, $src1;", Int64Regs,
170   Int64Regs, Int64Regs, int_nvvm_min_ll>;
171 def INT_NVVM_MIN_ULL : F_MATH_2<"min.u64 \t$dst, $src0, $src1;", Int64Regs,
172   Int64Regs, Int64Regs, int_nvvm_min_ull>;
173
174 def INT_NVVM_MAX_I : F_MATH_2<"max.s32 \t$dst, $src0, $src1;", Int32Regs,
175   Int32Regs, Int32Regs, int_nvvm_max_i>;
176 def INT_NVVM_MAX_UI : F_MATH_2<"max.u32 \t$dst, $src0, $src1;", Int32Regs,
177   Int32Regs, Int32Regs, int_nvvm_max_ui>;
178
179 def INT_NVVM_MAX_LL : F_MATH_2<"max.s64 \t$dst, $src0, $src1;", Int64Regs,
180   Int64Regs, Int64Regs, int_nvvm_max_ll>;
181 def INT_NVVM_MAX_ULL : F_MATH_2<"max.u64 \t$dst, $src0, $src1;", Int64Regs,
182   Int64Regs, Int64Regs, int_nvvm_max_ull>;
183
184 def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs,
185   Float32Regs, Float32Regs, int_nvvm_fmin_f>;
186 def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;",
187   Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>;
188
189 def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs,
190   Float32Regs, Float32Regs, int_nvvm_fmax_f>;
191 def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;",
192   Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>;
193
194 def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs,
195   Float64Regs, Float64Regs, int_nvvm_fmin_d>;
196 def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs,
197   Float64Regs, Float64Regs, int_nvvm_fmax_d>;
198
199 //
200 // Multiplication
201 //
202
203 def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs,
204   Int32Regs, Int32Regs, int_nvvm_mulhi_i>;
205 def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs,
206   Int32Regs, Int32Regs, int_nvvm_mulhi_ui>;
207
208 def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs,
209   Int64Regs, Int64Regs, int_nvvm_mulhi_ll>;
210 def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs,
211   Int64Regs, Int64Regs, int_nvvm_mulhi_ull>;
212
213 def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;",
214   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>;
215 def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;",
216   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>;
217 def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;",
218   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>;
219 def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;",
220   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>;
221 def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;",
222   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>;
223 def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;",
224   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>;
225 def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;",
226   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>;
227 def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;",
228   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>;
229
230 def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;",
231   Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>;
232 def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;",
233   Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>;
234 def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;",
235   Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>;
236 def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;",
237   Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>;
238
239 def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;",
240   Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>;
241 def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;",
242   Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>;
243
244 //
245 // Div
246 //
247
248 def INT_NVVM_DIV_APPROX_FTZ_F
249   : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs,
250     Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>;
251 def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;",
252   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>;
253
254 def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;",
255   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>;
256 def INT_NVVM_DIV_RN_F     : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;",
257   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>;
258 def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;",
259   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>;
260 def INT_NVVM_DIV_RZ_F     : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;",
261   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>;
262 def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;",
263   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>;
264 def INT_NVVM_DIV_RM_F     : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;",
265   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>;
266 def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;",
267   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>;
268 def INT_NVVM_DIV_RP_F     : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;",
269   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>;
270
271 def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;",
272   Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>;
273 def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;",
274   Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>;
275 def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;",
276   Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>;
277 def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;",
278   Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>;
279
280 //
281 // Brev
282 //
283
284 def INT_NVVM_BREV32 : F_MATH_1<"brev.b32 \t$dst, $src0;", Int32Regs, Int32Regs,
285   int_nvvm_brev32>;
286 def INT_NVVM_BREV64 : F_MATH_1<"brev.b64 \t$dst, $src0;", Int64Regs, Int64Regs,
287   int_nvvm_brev64>;
288
289 //
290 // Sad
291 //
292
293 def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;",
294   Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>;
295 def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;",
296   Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>;
297
298 //
299 // Floor  Ceil
300 //
301
302 def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a),
303           (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
304 def : Pat<(int_nvvm_floor_f Float32Regs:$a),
305           (CVT_f32_f32 Float32Regs:$a, CvtRMI)>;
306 def : Pat<(int_nvvm_floor_d Float64Regs:$a),
307           (CVT_f64_f64 Float64Regs:$a, CvtRMI)>;
308
309 def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a),
310           (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
311 def : Pat<(int_nvvm_ceil_f Float32Regs:$a),
312           (CVT_f32_f32 Float32Regs:$a, CvtRPI)>;
313 def : Pat<(int_nvvm_ceil_d Float64Regs:$a),
314           (CVT_f64_f64 Float64Regs:$a, CvtRPI)>;
315
316 //
317 // Abs
318 //
319
320 def INT_NVVM_ABS_I : F_MATH_1<"abs.s32 \t$dst, $src0;", Int32Regs, Int32Regs,
321   int_nvvm_abs_i>;
322 def INT_NVVM_ABS_LL : F_MATH_1<"abs.s64 \t$dst, $src0;", Int64Regs, Int64Regs,
323   int_nvvm_abs_ll>;
324
325 def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs,
326   Float32Regs, int_nvvm_fabs_ftz_f>;
327 def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs,
328   Float32Regs, int_nvvm_fabs_f>;
329
330 def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs,
331   Float64Regs, int_nvvm_fabs_d>;
332
333 //
334 // Round
335 //
336
337 def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a),
338           (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
339 def : Pat<(int_nvvm_round_f Float32Regs:$a),
340           (CVT_f32_f32 Float32Regs:$a, CvtRNI)>;
341 def : Pat<(int_nvvm_round_d Float64Regs:$a),
342           (CVT_f64_f64 Float64Regs:$a, CvtRNI)>;
343
344 //
345 // Trunc
346 //
347
348 def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a),
349           (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
350 def : Pat<(int_nvvm_trunc_f Float32Regs:$a),
351           (CVT_f32_f32 Float32Regs:$a, CvtRZI)>;
352 def : Pat<(int_nvvm_trunc_d Float64Regs:$a),
353           (CVT_f64_f64 Float64Regs:$a, CvtRZI)>;
354
355 //
356 // Saturate
357 //
358
359 def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a),
360           (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>;
361 def : Pat<(int_nvvm_saturate_f Float32Regs:$a),
362           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
363 def : Pat<(int_nvvm_saturate_d Float64Regs:$a),
364           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
365
366 //
367 // Exp2  Log2
368 //
369
370 def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;",
371   Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>;
372 def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;",
373   Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>;
374 def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;",
375   Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>;
376
377 def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;",
378   Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>;
379 def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;",
380   Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>;
381 def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;",
382   Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>;
383
384 //
385 // Sin  Cos
386 //
387
388 def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;",
389   Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>;
390 def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;",
391   Float32Regs, Float32Regs, int_nvvm_sin_approx_f>;
392
393 def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;",
394   Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>;
395 def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;",
396   Float32Regs, Float32Regs, int_nvvm_cos_approx_f>;
397
398 //
399 // Fma
400 //
401
402 def INT_NVVM_FMA_RN_FTZ_F
403   : F_MATH_3<"fma.rn.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
404     Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_ftz_f>;
405 def INT_NVVM_FMA_RN_F : F_MATH_3<"fma.rn.f32 \t$dst, $src0, $src1, $src2;",
406   Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_f>;
407 def INT_NVVM_FMA_RZ_FTZ_F
408   : F_MATH_3<"fma.rz.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
409     Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_ftz_f>;
410 def INT_NVVM_FMA_RZ_F : F_MATH_3<"fma.rz.f32 \t$dst, $src0, $src1, $src2;",
411   Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_f>;
412 def INT_NVVM_FMA_RM_FTZ_F
413   : F_MATH_3<"fma.rm.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
414     Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_ftz_f>;
415 def INT_NVVM_FMA_RM_F : F_MATH_3<"fma.rm.f32 \t$dst, $src0, $src1, $src2;",
416   Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_f>;
417 def INT_NVVM_FMA_RP_FTZ_F
418   : F_MATH_3<"fma.rp.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
419     Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_ftz_f>;
420 def INT_NVVM_FMA_RP_F : F_MATH_3<"fma.rp.f32 \t$dst, $src0, $src1, $src2;",
421   Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_f>;
422
423 def INT_NVVM_FMA_RN_D : F_MATH_3<"fma.rn.f64 \t$dst, $src0, $src1, $src2;",
424   Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rn_d>;
425 def INT_NVVM_FMA_RZ_D : F_MATH_3<"fma.rz.f64 \t$dst, $src0, $src1, $src2;",
426   Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rz_d>;
427 def INT_NVVM_FMA_RM_D : F_MATH_3<"fma.rm.f64 \t$dst, $src0, $src1, $src2;",
428   Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rm_d>;
429 def INT_NVVM_FMA_RP_D : F_MATH_3<"fma.rp.f64 \t$dst, $src0, $src1, $src2;",
430   Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rp_d>;
431
432 //
433 // Rcp
434 //
435
436 def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;",
437   Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>;
438 def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;",
439   Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>;
440 def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;",
441   Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>;
442 def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;",
443   Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>;
444 def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;",
445   Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>;
446 def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;",
447   Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>;
448 def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;",
449   Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>;
450 def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;",
451   Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>;
452
453 def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs,
454   Float64Regs, int_nvvm_rcp_rn_d>;
455 def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs,
456   Float64Regs, int_nvvm_rcp_rz_d>;
457 def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs,
458   Float64Regs, int_nvvm_rcp_rm_d>;
459 def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs,
460   Float64Regs, int_nvvm_rcp_rp_d>;
461
462 def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;",
463   Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>;
464
465 //
466 // Sqrt
467 //
468
469 def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;",
470   Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>;
471 def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs,
472   Float32Regs, int_nvvm_sqrt_rn_f>;
473 def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;",
474   Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>;
475 def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs,
476   Float32Regs, int_nvvm_sqrt_rz_f>;
477 def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;",
478   Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>;
479 def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs,
480   Float32Regs, int_nvvm_sqrt_rm_f>;
481 def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;",
482   Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>;
483 def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs,
484   Float32Regs, int_nvvm_sqrt_rp_f>;
485 def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;",
486   Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>;
487 def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;",
488   Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>;
489
490 def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs,
491   Float64Regs, int_nvvm_sqrt_rn_d>;
492 def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs,
493   Float64Regs, int_nvvm_sqrt_rz_d>;
494 def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs,
495   Float64Regs, int_nvvm_sqrt_rm_d>;
496 def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs,
497   Float64Regs, int_nvvm_sqrt_rp_d>;
498
499 // nvvm_sqrt intrinsic
500 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
501           (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>;
502 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
503           (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>;
504 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
505           (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>;
506 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
507           (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>;
508
509 //
510 // Rsqrt
511 //
512
513 def INT_NVVM_RSQRT_APPROX_FTZ_F
514   : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs,
515     int_nvvm_rsqrt_approx_ftz_f>;
516 def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;",
517   Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>;
518 def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;",
519   Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>;
520
521 //
522 // Add
523 //
524
525 def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;",
526   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>;
527 def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;",
528   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>;
529 def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;",
530   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>;
531 def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;",
532   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>;
533 def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;",
534   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>;
535 def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;",
536   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>;
537 def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;",
538   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>;
539 def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;",
540   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>;
541
542 def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;",
543   Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>;
544 def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;",
545   Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>;
546 def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;",
547   Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>;
548 def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;",
549   Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>;
550
551 //
552 // Convert
553 //
554
555 def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a),
556           (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>;
557 def : Pat<(int_nvvm_d2f_rn Float64Regs:$a),
558           (CVT_f32_f64 Float64Regs:$a, CvtRN)>;
559 def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a),
560           (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>;
561 def : Pat<(int_nvvm_d2f_rz Float64Regs:$a),
562           (CVT_f32_f64 Float64Regs:$a, CvtRZ)>;
563 def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a),
564           (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>;
565 def : Pat<(int_nvvm_d2f_rm Float64Regs:$a),
566           (CVT_f32_f64 Float64Regs:$a, CvtRM)>;
567 def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a),
568           (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>;
569 def : Pat<(int_nvvm_d2f_rp Float64Regs:$a),
570           (CVT_f32_f64 Float64Regs:$a, CvtRP)>;
571
572 def : Pat<(int_nvvm_d2i_rn Float64Regs:$a),
573           (CVT_s32_f64 Float64Regs:$a, CvtRNI)>;
574 def : Pat<(int_nvvm_d2i_rz Float64Regs:$a),
575           (CVT_s32_f64 Float64Regs:$a, CvtRZI)>;
576 def : Pat<(int_nvvm_d2i_rm Float64Regs:$a),
577           (CVT_s32_f64 Float64Regs:$a, CvtRMI)>;
578 def : Pat<(int_nvvm_d2i_rp Float64Regs:$a),
579           (CVT_s32_f64 Float64Regs:$a, CvtRPI)>;
580
581 def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a),
582           (CVT_u32_f64 Float64Regs:$a, CvtRNI)>;
583 def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a),
584           (CVT_u32_f64 Float64Regs:$a, CvtRZI)>;
585 def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a),
586           (CVT_u32_f64 Float64Regs:$a, CvtRMI)>;
587 def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a),
588           (CVT_u32_f64 Float64Regs:$a, CvtRPI)>;
589
590 def : Pat<(int_nvvm_i2d_rn Int32Regs:$a),
591           (CVT_f64_s32 Int32Regs:$a, CvtRN)>;
592 def : Pat<(int_nvvm_i2d_rz Int32Regs:$a),
593           (CVT_f64_s32 Int32Regs:$a, CvtRZ)>;
594 def : Pat<(int_nvvm_i2d_rm Int32Regs:$a),
595           (CVT_f64_s32 Int32Regs:$a, CvtRM)>;
596 def : Pat<(int_nvvm_i2d_rp Int32Regs:$a),
597           (CVT_f64_s32 Int32Regs:$a, CvtRP)>;
598
599 def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a),
600           (CVT_f64_u32 Int32Regs:$a, CvtRN)>;
601 def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a),
602           (CVT_f64_u32 Int32Regs:$a, CvtRZ)>;
603 def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a),
604           (CVT_f64_u32 Int32Regs:$a, CvtRM)>;
605 def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a),
606           (CVT_f64_u32 Int32Regs:$a, CvtRP)>;
607
608 def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a),
609           (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
610 def : Pat<(int_nvvm_f2i_rn Float32Regs:$a),
611           (CVT_s32_f32 Float32Regs:$a, CvtRNI)>;
612 def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a),
613           (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
614 def : Pat<(int_nvvm_f2i_rz Float32Regs:$a),
615           (CVT_s32_f32 Float32Regs:$a, CvtRZI)>;
616 def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a),
617           (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
618 def : Pat<(int_nvvm_f2i_rm Float32Regs:$a),
619           (CVT_s32_f32 Float32Regs:$a, CvtRMI)>;
620 def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a),
621           (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
622 def : Pat<(int_nvvm_f2i_rp Float32Regs:$a),
623           (CVT_s32_f32 Float32Regs:$a, CvtRPI)>;
624
625 def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a),
626           (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
627 def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a),
628           (CVT_u32_f32 Float32Regs:$a, CvtRNI)>;
629 def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a),
630           (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
631 def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a),
632           (CVT_u32_f32 Float32Regs:$a, CvtRZI)>;
633 def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a),
634           (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
635 def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a),
636           (CVT_u32_f32 Float32Regs:$a, CvtRMI)>;
637 def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a),
638           (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
639 def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a),
640           (CVT_u32_f32 Float32Regs:$a, CvtRPI)>;
641
642 def : Pat<(int_nvvm_i2f_rn Int32Regs:$a),
643           (CVT_f32_s32 Int32Regs:$a, CvtRN)>;
644 def : Pat<(int_nvvm_i2f_rz Int32Regs:$a),
645           (CVT_f32_s32 Int32Regs:$a, CvtRZ)>;
646 def : Pat<(int_nvvm_i2f_rm Int32Regs:$a),
647           (CVT_f32_s32 Int32Regs:$a, CvtRM)>;
648 def : Pat<(int_nvvm_i2f_rp Int32Regs:$a),
649           (CVT_f32_s32 Int32Regs:$a, CvtRP)>;
650
651 def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a),
652           (CVT_f32_u32 Int32Regs:$a, CvtRN)>;
653 def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a),
654           (CVT_f32_u32 Int32Regs:$a, CvtRZ)>;
655 def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a),
656           (CVT_f32_u32 Int32Regs:$a, CvtRM)>;
657 def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a),
658           (CVT_f32_u32 Int32Regs:$a, CvtRP)>;
659
660 def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};",
661   Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>;
662
663 def INT_NVVM_D2I_LO : F_MATH_1<!strconcat("{{\n\t",
664                        !strconcat(".reg .b32 %temp; \n\t",
665              !strconcat("mov.b64 \t{$dst, %temp}, $src0;\n\t",
666                "}}"))),
667              Int32Regs, Float64Regs, int_nvvm_d2i_lo>;
668 def INT_NVVM_D2I_HI : F_MATH_1<!strconcat("{{\n\t",
669                        !strconcat(".reg .b32 %temp; \n\t",
670                          !strconcat("mov.b64 \t{%temp, $dst}, $src0;\n\t",
671                            "}}"))),
672              Int32Regs, Float64Regs, int_nvvm_d2i_hi>;
673
674 def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a),
675           (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
676 def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a),
677           (CVT_s64_f32 Float32Regs:$a, CvtRNI)>;
678 def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a),
679           (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
680 def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a),
681           (CVT_s64_f32 Float32Regs:$a, CvtRZI)>;
682 def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a),
683           (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
684 def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a),
685           (CVT_s64_f32 Float32Regs:$a, CvtRMI)>;
686 def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a),
687           (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
688 def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a),
689           (CVT_s64_f32 Float32Regs:$a, CvtRPI)>;
690
691 def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a),
692           (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
693 def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a),
694           (CVT_u64_f32 Float32Regs:$a, CvtRNI)>;
695 def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a),
696           (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
697 def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a),
698           (CVT_u64_f32 Float32Regs:$a, CvtRZI)>;
699 def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a),
700           (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
701 def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a),
702           (CVT_u64_f32 Float32Regs:$a, CvtRMI)>;
703 def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a),
704           (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
705 def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a),
706           (CVT_u64_f32 Float32Regs:$a, CvtRPI)>;
707
708 def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a),
709           (CVT_s64_f64 Float64Regs:$a, CvtRNI)>;
710 def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a),
711           (CVT_s64_f64 Float64Regs:$a, CvtRZI)>;
712 def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a),
713           (CVT_s64_f64 Float64Regs:$a, CvtRMI)>;
714 def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a),
715           (CVT_s64_f64 Float64Regs:$a, CvtRPI)>;
716
717 def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a),
718           (CVT_u64_f64 Float64Regs:$a, CvtRNI)>;
719 def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a),
720           (CVT_u64_f64 Float64Regs:$a, CvtRZI)>;
721 def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a),
722           (CVT_u64_f64 Float64Regs:$a, CvtRMI)>;
723 def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a),
724           (CVT_u64_f64 Float64Regs:$a, CvtRPI)>;
725
726 def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a),
727           (CVT_f32_s64 Int64Regs:$a, CvtRN)>;
728 def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a),
729           (CVT_f32_s64 Int64Regs:$a, CvtRZ)>;
730 def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a),
731           (CVT_f32_s64 Int64Regs:$a, CvtRM)>;
732 def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a),
733           (CVT_f32_s64 Int64Regs:$a, CvtRP)>;
734
735 def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a),
736           (CVT_f32_u64 Int64Regs:$a, CvtRN)>;
737 def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a),
738           (CVT_f32_u64 Int64Regs:$a, CvtRZ)>;
739 def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a),
740           (CVT_f32_u64 Int64Regs:$a, CvtRM)>;
741 def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a),
742           (CVT_f32_u64 Int64Regs:$a, CvtRP)>;
743
744 def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a),
745           (CVT_f64_s64 Int64Regs:$a, CvtRN)>;
746 def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a),
747           (CVT_f64_s64 Int64Regs:$a, CvtRZ)>;
748 def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a),
749           (CVT_f64_s64 Int64Regs:$a, CvtRM)>;
750 def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a),
751           (CVT_f64_s64 Int64Regs:$a, CvtRP)>;
752
753 def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a),
754           (CVT_f64_u64 Int64Regs:$a, CvtRN)>;
755 def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a),
756           (CVT_f64_u64 Int64Regs:$a, CvtRZ)>;
757 def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a),
758           (CVT_f64_u64 Int64Regs:$a, CvtRM)>;
759 def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a),
760           (CVT_f64_u64 Int64Regs:$a, CvtRP)>;
761
762
763 // FIXME: Ideally, we could use these patterns instead of the scope-creating
764 // patterns, but ptxas does not like these since .s16 is not compatible with
765 // .f16.  The solution is to use .bXX for all integer register types, but we
766 // are not there yet.
767 //def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a),
768 //          (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>;
769 //def : Pat<(int_nvvm_f2h_rn Float32Regs:$a),
770 //          (CVT_f16_f32 Float32Regs:$a, CvtRN)>;
771 //
772 //def : Pat<(int_nvvm_h2f Int16Regs:$a),
773 //          (CVT_f32_f16 Int16Regs:$a, CvtNONE)>;
774
775 def INT_NVVM_F2H_RN_FTZ : F_MATH_1<!strconcat("{{\n\t",
776                                    !strconcat(".reg .b16 %temp;\n\t",
777            !strconcat("cvt.rn.ftz.f16.f32 \t%temp, $src0;\n\t",
778            !strconcat("mov.b16 \t$dst, %temp;\n",
779              "}}")))),
780                                    Int16Regs, Float32Regs, int_nvvm_f2h_rn_ftz>;
781 def INT_NVVM_F2H_RN : F_MATH_1<!strconcat("{{\n\t",
782                                    !strconcat(".reg .b16 %temp;\n\t",
783            !strconcat("cvt.rn.f16.f32 \t%temp, $src0;\n\t",
784            !strconcat("mov.b16 \t$dst, %temp;\n",
785              "}}")))),
786            Int16Regs, Float32Regs, int_nvvm_f2h_rn>;
787
788 def INT_NVVM_H2F : F_MATH_1<!strconcat("{{\n\t",
789                             !strconcat(".reg .b16 %temp;\n\t",
790           !strconcat("mov.b16 \t%temp, $src0;\n\t",
791           !strconcat("cvt.f32.f16 \t$dst, %temp;\n\t",
792             "}}")))),
793           Float32Regs, Int16Regs, int_nvvm_h2f>;
794
795 def : Pat<(f32 (f16_to_fp Int16Regs:$a)),
796           (CVT_f32_f16 Int16Regs:$a, CvtNONE)>;
797 def : Pat<(i16 (fp_to_f16 Float32Regs:$a)),
798           (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>;
799 def : Pat<(i16 (fp_to_f16 Float32Regs:$a)),
800           (CVT_f16_f32 Float32Regs:$a, CvtRN)>;
801
802 def : Pat<(f64 (f16_to_fp Int16Regs:$a)),
803           (CVT_f64_f16 Int16Regs:$a, CvtNONE)>;
804 def : Pat<(i16 (fp_to_f16 Float64Regs:$a)),
805           (CVT_f16_f64 Float64Regs:$a, CvtRN)>;
806
807 //
808 // Bitcast
809 //
810
811 def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs,
812   Float32Regs, int_nvvm_bitcast_f2i>;
813 def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs,
814   Int32Regs, int_nvvm_bitcast_i2f>;
815
816 def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs,
817   Int64Regs, int_nvvm_bitcast_ll2d>;
818 def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs,
819   Float64Regs, int_nvvm_bitcast_d2ll>;
820
821 //-----------------------------------
822 // Atomic Functions
823 //-----------------------------------
824
825 class ATOMIC_GLOBAL_CHK <dag ops, dag frag>
826  : PatFrag<ops, frag, [{
827    return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
828 }]>;
829 class ATOMIC_SHARED_CHK <dag ops, dag frag>
830  : PatFrag<ops, frag, [{
831    return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
832 }]>;
833 class ATOMIC_GENERIC_CHK <dag ops, dag frag>
834  : PatFrag<ops, frag, [{
835    return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
836 }]>;
837
838 multiclass F_ATOMIC_2_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
839   string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
840   Operand IMMType, SDNode IMM, Predicate Pred> {
841   def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
842                !strconcat("atom",
843          !strconcat(SpaceStr,
844          !strconcat(OpcStr,
845          !strconcat(TypeStr,
846          !strconcat(" \t$dst, [$addr], $b;", ""))))),
847          [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
848   Requires<[Pred]>;
849   def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b),
850                !strconcat("atom",
851          !strconcat(SpaceStr,
852          !strconcat(OpcStr,
853          !strconcat(TypeStr,
854          !strconcat(" \t$dst, [$addr], $b;", ""))))),
855          [(set regclass:$dst, (IntOp ptrclass:$addr, IMM:$b))]>,
856   Requires<[Pred]>;
857 }
858 multiclass F_ATOMIC_2<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
859   string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM, Predicate Pred> {
860   defm p32 : F_ATOMIC_2_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
861     IntOp, IMMType, IMM, Pred>;
862   defm p64 : F_ATOMIC_2_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
863     IntOp, IMMType, IMM, Pred>;
864 }
865
866 // has 2 operands, neg the second one
867 multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
868   string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
869   Operand IMMType, Predicate Pred> {
870   def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
871     !strconcat("{{ \n\t",
872          !strconcat(".reg \t.s",
873          !strconcat(TypeStr,
874          !strconcat(" temp; \n\t",
875          !strconcat("neg.s",
876          !strconcat(TypeStr,
877          !strconcat(" \ttemp, $b; \n\t",
878                !strconcat("atom",
879          !strconcat(SpaceStr,
880          !strconcat(OpcStr,
881          !strconcat(".u",
882          !strconcat(TypeStr,
883          !strconcat(" \t$dst, [$addr], temp; \n\t",
884            !strconcat("}}", "")))))))))))))),
885          [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
886   Requires<[Pred]>;
887 }
888 multiclass F_ATOMIC_2_NEG<NVPTXRegClass regclass, string SpaceStr,
889   string TypeStr, string OpcStr, PatFrag IntOp, Operand IMMType,
890   Predicate Pred> {
891  defm p32: F_ATOMIC_2_NEG_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
892    IntOp, IMMType, Pred> ;
893  defm p64: F_ATOMIC_2_NEG_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
894    IntOp, IMMType, Pred> ;
895 }
896
897 // has 3 operands
898 multiclass F_ATOMIC_3_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
899   string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
900   Operand IMMType, Predicate Pred> {
901   def reg : NVPTXInst<(outs regclass:$dst),
902     (ins ptrclass:$addr, regclass:$b, regclass:$c),
903                !strconcat("atom",
904          !strconcat(SpaceStr,
905          !strconcat(OpcStr,
906          !strconcat(TypeStr,
907          !strconcat(" \t$dst, [$addr], $b, $c;", ""))))),
908          [(set regclass:$dst,
909            (IntOp ptrclass:$addr, regclass:$b, regclass:$c))]>,
910          Requires<[Pred]>;
911   def imm1 : NVPTXInst<(outs regclass:$dst),
912     (ins ptrclass:$addr, IMMType:$b, regclass:$c),
913                !strconcat("atom",
914          !strconcat(SpaceStr,
915          !strconcat(OpcStr,
916          !strconcat(TypeStr,
917          !strconcat(" \t$dst, [$addr], $b, $c;", ""))))),
918          [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, regclass:$c))]>,
919   Requires<[Pred]>;
920   def imm2 : NVPTXInst<(outs regclass:$dst),
921     (ins ptrclass:$addr, regclass:$b, IMMType:$c),
922                !strconcat("atom",
923          !strconcat(SpaceStr,
924          !strconcat(OpcStr,
925          !strconcat(TypeStr,
926          !strconcat(" \t$dst, [$addr], $b, $c;", ""))))),
927          [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, imm:$c))]>,
928   Requires<[Pred]>;
929   def imm3 : NVPTXInst<(outs regclass:$dst),
930     (ins ptrclass:$addr, IMMType:$b, IMMType:$c),
931                !strconcat("atom",
932          !strconcat(SpaceStr,
933          !strconcat(OpcStr,
934          !strconcat(TypeStr,
935          !strconcat(" \t$dst, [$addr], $b, $c;", ""))))),
936          [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, imm:$c))]>,
937   Requires<[Pred]>;
938 }
939 multiclass F_ATOMIC_3<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
940   string OpcStr, PatFrag IntOp, Operand IMMType, Predicate Pred> {
941   defm p32 : F_ATOMIC_3_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
942     IntOp, IMMType, Pred>;
943   defm p64 : F_ATOMIC_3_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
944     IntOp, IMMType, Pred>;
945 }
946
947 // atom_add
948
949 def atomic_load_add_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
950   (atomic_load_add_32 node:$a, node:$b)>;
951 def atomic_load_add_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
952   (atomic_load_add_32 node:$a, node:$b)>;
953 def atomic_load_add_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
954   (atomic_load_add_32 node:$a, node:$b)>;
955 def atomic_load_add_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
956   (atomic_load_add_64 node:$a, node:$b)>;
957 def atomic_load_add_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
958   (atomic_load_add_64 node:$a, node:$b)>;
959 def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
960   (atomic_load_add_64 node:$a, node:$b)>;
961 def atomic_load_add_f32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
962   (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
963 def atomic_load_add_f32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
964   (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
965 def atomic_load_add_f32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
966   (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
967
968 defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".add",
969   atomic_load_add_32_g, i32imm, imm, hasAtomRedG32>;
970 defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".add",
971   atomic_load_add_32_s, i32imm, imm, hasAtomRedS32>;
972 defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".add",
973   atomic_load_add_32_gen, i32imm, imm, hasAtomRedGen32>;
974 defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
975   ".add", atomic_load_add_32_gen, i32imm, imm, useAtomRedG32forGen32>;
976
977 defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", ".add",
978   atomic_load_add_64_g, i64imm, imm, hasAtomRedG64>;
979 defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", ".add",
980   atomic_load_add_64_s, i64imm, imm, hasAtomRedS64>;
981 defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".add",
982   atomic_load_add_64_gen, i64imm, imm, hasAtomRedGen64>;
983 defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".u64",
984   ".add", atomic_load_add_64_gen, i64imm, imm, useAtomRedG64forGen64>;
985
986 defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<Float32Regs, ".global", ".f32", ".add",
987   atomic_load_add_f32_g, f32imm, fpimm, hasAtomAddF32>;
988 defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<Float32Regs, ".shared", ".f32", ".add",
989   atomic_load_add_f32_s, f32imm, fpimm, hasAtomAddF32>;
990 defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<Float32Regs, "", ".f32", ".add",
991   atomic_load_add_f32_gen, f32imm, fpimm, hasAtomAddF32>;
992
993 // atom_sub
994
995 def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
996   (atomic_load_sub_32 node:$a, node:$b)>;
997 def atomic_load_sub_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
998   (atomic_load_sub_32 node:$a, node:$b)>;
999 def atomic_load_sub_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1000   (atomic_load_sub_32 node:$a, node:$b)>;
1001 def atomic_load_sub_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1002   (atomic_load_sub_64 node:$a, node:$b)>;
1003 def atomic_load_sub_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1004   (atomic_load_sub_64 node:$a, node:$b)>;
1005 def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1006   (atomic_load_sub_64 node:$a, node:$b)>;
1007
1008 defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", ".add",
1009   atomic_load_sub_32_g, i32imm, hasAtomRedG32>;
1010 defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", ".add",
1011   atomic_load_sub_64_g, i64imm, hasAtomRedG64>;
1012 defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<Int32Regs, "", "32", ".add",
1013   atomic_load_sub_32_gen, i32imm, hasAtomRedGen32>;
1014 defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<Int32Regs, ".global", "32",
1015   ".add", atomic_load_sub_32_gen, i32imm, useAtomRedG32forGen32>;
1016 defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<Int32Regs, ".shared", "32", ".add",
1017   atomic_load_sub_32_s, i32imm, hasAtomRedS32>;
1018 defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<Int64Regs, ".shared", "64", ".add",
1019   atomic_load_sub_64_s, i64imm, hasAtomRedS64>;
1020 defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<Int64Regs, "", "64", ".add",
1021   atomic_load_sub_64_gen, i64imm, hasAtomRedGen64>;
1022 defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<Int64Regs, ".global", "64",
1023   ".add", atomic_load_sub_64_gen, i64imm, useAtomRedG64forGen64>;
1024
1025 // atom_swap
1026
1027 def atomic_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1028   (atomic_swap_32 node:$a, node:$b)>;
1029 def atomic_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1030   (atomic_swap_32 node:$a, node:$b)>;
1031 def atomic_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1032   (atomic_swap_32 node:$a, node:$b)>;
1033 def atomic_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1034   (atomic_swap_64 node:$a, node:$b)>;
1035 def atomic_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1036   (atomic_swap_64 node:$a, node:$b)>;
1037 def atomic_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1038   (atomic_swap_64 node:$a, node:$b)>;
1039
1040 defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".exch",
1041   atomic_swap_32_g, i32imm, imm, hasAtomRedG32>;
1042 defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".exch",
1043   atomic_swap_32_s, i32imm, imm, hasAtomRedS32>;
1044 defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".exch",
1045   atomic_swap_32_gen, i32imm, imm, hasAtomRedGen32>;
1046 defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1047   ".exch", atomic_swap_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1048 defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".exch",
1049   atomic_swap_64_g, i64imm, imm, hasAtomRedG64>;
1050 defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".exch",
1051   atomic_swap_64_s, i64imm, imm, hasAtomRedS64>;
1052 defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".exch",
1053   atomic_swap_64_gen, i64imm, imm, hasAtomRedGen64>;
1054 defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1055   ".exch", atomic_swap_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1056
1057 // atom_max
1058
1059 def atomic_load_max_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1060   , (atomic_load_max_32 node:$a, node:$b)>;
1061 def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1062   (atomic_load_max_32 node:$a, node:$b)>;
1063 def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1064   (atomic_load_max_32 node:$a, node:$b)>;
1065 def atomic_load_max_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1066   , (atomic_load_max_64 node:$a, node:$b)>;
1067 def atomic_load_max_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1068   (atomic_load_max_64 node:$a, node:$b)>;
1069 def atomic_load_max_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1070   (atomic_load_max_64 node:$a, node:$b)>;
1071 def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1072   (atomic_load_umax_32 node:$a, node:$b)>;
1073 def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1074   (atomic_load_umax_32 node:$a, node:$b)>;
1075 def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1076   (atomic_load_umax_32 node:$a, node:$b)>;
1077 def atomic_load_umax_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1078   (atomic_load_umax_64 node:$a, node:$b)>;
1079 def atomic_load_umax_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1080   (atomic_load_umax_64 node:$a, node:$b)>;
1081 def atomic_load_umax_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1082   (atomic_load_umax_64 node:$a, node:$b)>;
1083
1084 defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
1085   ".max", atomic_load_max_32_g, i32imm, imm, hasAtomRedG32>;
1086 defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
1087   ".max", atomic_load_max_32_s, i32imm, imm, hasAtomRedS32>;
1088 defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".max",
1089   atomic_load_max_32_gen, i32imm, imm, hasAtomRedGen32>;
1090 defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1091   ".s32", ".max", atomic_load_max_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1092 defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
1093   ".max", atomic_load_max_64_g, i64imm, imm, hasAtomRedG64>;
1094 defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
1095   ".max", atomic_load_max_64_s, i64imm, imm, hasAtomRedS64>;
1096 defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".max",
1097   atomic_load_max_64_gen, i64imm, imm, hasAtomRedGen64>;
1098 defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1099   ".s64", ".max", atomic_load_max_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1100 defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1101   ".max", atomic_load_umax_32_g, i32imm, imm, hasAtomRedG32>;
1102 defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
1103   ".max", atomic_load_umax_32_s, i32imm, imm, hasAtomRedS32>;
1104 defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".max",
1105   atomic_load_umax_32_gen, i32imm, imm, hasAtomRedGen32>;
1106 defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1107   ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1108 defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1109   ".max", atomic_load_umax_64_g, i64imm, imm, hasAtomRedG64>;
1110 defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
1111   ".max", atomic_load_umax_64_s, i64imm, imm, hasAtomRedS64>;
1112 defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".max",
1113   atomic_load_umax_64_gen, i64imm, imm, hasAtomRedGen64>;
1114 defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1115   ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1116
1117 // atom_min
1118
1119 def atomic_load_min_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1120   (atomic_load_min_32 node:$a, node:$b)>;
1121 def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1122   (atomic_load_min_32 node:$a, node:$b)>;
1123 def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1124   (atomic_load_min_32 node:$a, node:$b)>;
1125 def atomic_load_min_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1126   (atomic_load_min_64 node:$a, node:$b)>;
1127 def atomic_load_min_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1128   (atomic_load_min_64 node:$a, node:$b)>;
1129 def atomic_load_min_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1130   (atomic_load_min_64 node:$a, node:$b)>;
1131 def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1132   (atomic_load_umin_32 node:$a, node:$b)>;
1133 def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1134   (atomic_load_umin_32 node:$a, node:$b)>;
1135 def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1136   (atomic_load_umin_32 node:$a, node:$b)>;
1137 def atomic_load_umin_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1138   (atomic_load_umin_64 node:$a, node:$b)>;
1139 def atomic_load_umin_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1140   (atomic_load_umin_64 node:$a, node:$b)>;
1141 def atomic_load_umin_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1142   (atomic_load_umin_64 node:$a, node:$b)>;
1143
1144 defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
1145   ".min", atomic_load_min_32_g, i32imm, imm, hasAtomRedG32>;
1146 defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
1147   ".min", atomic_load_min_32_s, i32imm, imm, hasAtomRedS32>;
1148 defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".min",
1149   atomic_load_min_32_gen, i32imm, imm, hasAtomRedGen32>;
1150 defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1151   ".s32", ".min", atomic_load_min_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1152 defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
1153   ".min", atomic_load_min_64_g, i64imm, imm, hasAtomRedG64>;
1154 defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
1155   ".min", atomic_load_min_64_s, i64imm, imm, hasAtomRedS64>;
1156 defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".min",
1157   atomic_load_min_64_gen, i64imm, imm, hasAtomRedGen64>;
1158 defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1159   ".s64", ".min", atomic_load_min_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1160 defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1161   ".min", atomic_load_umin_32_g, i32imm, imm, hasAtomRedG32>;
1162 defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
1163   ".min", atomic_load_umin_32_s, i32imm, imm, hasAtomRedS32>;
1164 defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".min",
1165   atomic_load_umin_32_gen, i32imm, imm, hasAtomRedGen32>;
1166 defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1167   ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1168 defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1169   ".min", atomic_load_umin_64_g, i64imm, imm, hasAtomRedG64>;
1170 defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
1171   ".min", atomic_load_umin_64_s, i64imm, imm, hasAtomRedS64>;
1172 defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".min",
1173   atomic_load_umin_64_gen, i64imm, imm, hasAtomRedGen64>;
1174 defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1175   ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1176
1177 // atom_inc  atom_dec
1178
1179 def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1180   (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1181 def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1182   (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1183 def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1184   (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1185 def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1186   (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1187 def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1188   (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1189 def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1190   (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1191
1192 defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".inc",
1193   atomic_load_inc_32_g, i32imm, imm, hasAtomRedG32>;
1194 defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".inc",
1195   atomic_load_inc_32_s, i32imm, imm, hasAtomRedS32>;
1196 defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".inc",
1197   atomic_load_inc_32_gen, i32imm, imm, hasAtomRedGen32>;
1198 defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1199   ".inc", atomic_load_inc_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1200 defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".dec",
1201   atomic_load_dec_32_g, i32imm, imm, hasAtomRedG32>;
1202 defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".dec",
1203   atomic_load_dec_32_s, i32imm, imm, hasAtomRedS32>;
1204 defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".dec",
1205   atomic_load_dec_32_gen, i32imm, imm, hasAtomRedGen32>;
1206 defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1207   ".dec", atomic_load_dec_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1208
1209 // atom_and
1210
1211 def atomic_load_and_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1212   (atomic_load_and_32 node:$a, node:$b)>;
1213 def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1214   (atomic_load_and_32 node:$a, node:$b)>;
1215 def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1216   (atomic_load_and_32 node:$a, node:$b)>;
1217 def atomic_load_and_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1218   (atomic_load_and_64 node:$a, node:$b)>;
1219 def atomic_load_and_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1220   (atomic_load_and_64 node:$a, node:$b)>;
1221 def atomic_load_and_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1222   (atomic_load_and_64 node:$a, node:$b)>;
1223
1224 defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".and",
1225   atomic_load_and_32_g, i32imm, imm, hasAtomRedG32>;
1226 defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".and",
1227   atomic_load_and_32_s, i32imm, imm, hasAtomRedS32>;
1228 defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".and",
1229   atomic_load_and_32_gen, i32imm, imm, hasAtomRedGen32>;
1230 defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1231   ".and", atomic_load_and_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1232 defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".and",
1233   atomic_load_and_64_g, i64imm, imm, hasAtomRedG64>;
1234 defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".and",
1235   atomic_load_and_64_s, i64imm, imm, hasAtomRedS64>;
1236 defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".and",
1237   atomic_load_and_64_gen, i64imm, imm, hasAtomRedGen64>;
1238 defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1239   ".and", atomic_load_and_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1240
1241 // atom_or
1242
1243 def atomic_load_or_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1244   (atomic_load_or_32 node:$a, node:$b)>;
1245 def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1246   (atomic_load_or_32 node:$a, node:$b)>;
1247 def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1248   (atomic_load_or_32 node:$a, node:$b)>;
1249 def atomic_load_or_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1250   (atomic_load_or_64 node:$a, node:$b)>;
1251 def atomic_load_or_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1252   (atomic_load_or_64 node:$a, node:$b)>;
1253 def atomic_load_or_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1254   (atomic_load_or_64 node:$a, node:$b)>;
1255
1256 defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".or",
1257   atomic_load_or_32_g, i32imm, imm, hasAtomRedG32>;
1258 defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".or",
1259   atomic_load_or_32_gen, i32imm, imm, hasAtomRedGen32>;
1260 defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1261   ".or", atomic_load_or_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1262 defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".or",
1263   atomic_load_or_32_s, i32imm, imm, hasAtomRedS32>;
1264 defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".or",
1265   atomic_load_or_64_g, i64imm, imm, hasAtomRedG64>;
1266 defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".or",
1267   atomic_load_or_64_gen, i64imm, imm, hasAtomRedGen64>;
1268 defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1269   ".or", atomic_load_or_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1270 defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".or",
1271   atomic_load_or_64_s, i64imm, imm, hasAtomRedS64>;
1272
1273 // atom_xor
1274
1275 def atomic_load_xor_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1276   (atomic_load_xor_32 node:$a, node:$b)>;
1277 def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1278   (atomic_load_xor_32 node:$a, node:$b)>;
1279 def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1280   (atomic_load_xor_32 node:$a, node:$b)>;
1281 def atomic_load_xor_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1282   (atomic_load_xor_64 node:$a, node:$b)>;
1283 def atomic_load_xor_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1284   (atomic_load_xor_64 node:$a, node:$b)>;
1285 def atomic_load_xor_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1286   (atomic_load_xor_64 node:$a, node:$b)>;
1287
1288 defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".xor",
1289   atomic_load_xor_32_g, i32imm, imm, hasAtomRedG32>;
1290 defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".xor",
1291   atomic_load_xor_32_s, i32imm, imm, hasAtomRedS32>;
1292 defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".xor",
1293   atomic_load_xor_32_gen, i32imm, imm, hasAtomRedGen32>;
1294 defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1295   ".xor", atomic_load_xor_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1296 defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".xor",
1297   atomic_load_xor_64_g, i64imm, imm, hasAtomRedG64>;
1298 defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".xor",
1299   atomic_load_xor_64_s, i64imm, imm, hasAtomRedS64>;
1300 defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".xor",
1301   atomic_load_xor_64_gen, i64imm, imm, hasAtomRedGen64>;
1302 defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1303   ".xor", atomic_load_xor_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1304
1305 // atom_cas
1306
1307 def atomic_cmp_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1308   (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1309 def atomic_cmp_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1310   (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1311 def atomic_cmp_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1312   (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1313 def atomic_cmp_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1314   (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1315 def atomic_cmp_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1316   (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1317 def atomic_cmp_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1318   (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1319
1320 defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<Int32Regs, ".global", ".b32", ".cas",
1321   atomic_cmp_swap_32_g, i32imm, hasAtomRedG32>;
1322 defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<Int32Regs, ".shared", ".b32", ".cas",
1323   atomic_cmp_swap_32_s, i32imm, hasAtomRedS32>;
1324 defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<Int32Regs, "", ".b32", ".cas",
1325   atomic_cmp_swap_32_gen, i32imm, hasAtomRedGen32>;
1326 defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<Int32Regs, ".global", ".b32",
1327   ".cas", atomic_cmp_swap_32_gen, i32imm, useAtomRedG32forGen32>;
1328 defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<Int64Regs, ".global", ".b64", ".cas",
1329   atomic_cmp_swap_64_g, i64imm, hasAtomRedG64>;
1330 defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<Int64Regs, ".shared", ".b64", ".cas",
1331   atomic_cmp_swap_64_s, i64imm, hasAtomRedS64>;
1332 defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<Int64Regs, "", ".b64", ".cas",
1333   atomic_cmp_swap_64_gen, i64imm, hasAtomRedGen64>;
1334 defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<Int64Regs, ".global", ".b64",
1335   ".cas", atomic_cmp_swap_64_gen, i64imm, useAtomRedG64forGen64>;
1336
1337
1338 //-----------------------------------
1339 // Read Special Registers
1340 //-----------------------------------
1341 class F_SREG<string OpStr, NVPTXRegClass regclassOut, Intrinsic IntOp> :
1342       NVPTXInst<(outs regclassOut:$dst), (ins),
1343                OpStr,
1344          [(set regclassOut:$dst, (IntOp))]>;
1345
1346 def INT_PTX_SREG_TID_X : F_SREG<"mov.u32 \t$dst, %tid.x;", Int32Regs,
1347   int_nvvm_read_ptx_sreg_tid_x>;
1348 def INT_PTX_SREG_TID_Y : F_SREG<"mov.u32 \t$dst, %tid.y;", Int32Regs,
1349   int_nvvm_read_ptx_sreg_tid_y>;
1350 def INT_PTX_SREG_TID_Z : F_SREG<"mov.u32 \t$dst, %tid.z;", Int32Regs,
1351   int_nvvm_read_ptx_sreg_tid_z>;
1352
1353 def INT_PTX_SREG_NTID_X : F_SREG<"mov.u32 \t$dst, %ntid.x;", Int32Regs,
1354   int_nvvm_read_ptx_sreg_ntid_x>;
1355 def INT_PTX_SREG_NTID_Y : F_SREG<"mov.u32 \t$dst, %ntid.y;", Int32Regs,
1356   int_nvvm_read_ptx_sreg_ntid_y>;
1357 def INT_PTX_SREG_NTID_Z : F_SREG<"mov.u32 \t$dst, %ntid.z;", Int32Regs,
1358   int_nvvm_read_ptx_sreg_ntid_z>;
1359
1360 def INT_PTX_SREG_CTAID_X : F_SREG<"mov.u32 \t$dst, %ctaid.x;", Int32Regs,
1361   int_nvvm_read_ptx_sreg_ctaid_x>;
1362 def INT_PTX_SREG_CTAID_Y : F_SREG<"mov.u32 \t$dst, %ctaid.y;", Int32Regs,
1363   int_nvvm_read_ptx_sreg_ctaid_y>;
1364 def INT_PTX_SREG_CTAID_Z : F_SREG<"mov.u32 \t$dst, %ctaid.z;", Int32Regs,
1365   int_nvvm_read_ptx_sreg_ctaid_z>;
1366
1367 def INT_PTX_SREG_NCTAID_X : F_SREG<"mov.u32 \t$dst, %nctaid.x;", Int32Regs,
1368   int_nvvm_read_ptx_sreg_nctaid_x>;
1369 def INT_PTX_SREG_NCTAID_Y : F_SREG<"mov.u32 \t$dst, %nctaid.y;", Int32Regs,
1370   int_nvvm_read_ptx_sreg_nctaid_y>;
1371 def INT_PTX_SREG_NCTAID_Z : F_SREG<"mov.u32 \t$dst, %nctaid.z;", Int32Regs,
1372   int_nvvm_read_ptx_sreg_nctaid_z>;
1373
1374 def INT_PTX_SREG_WARPSIZE : F_SREG<"mov.u32 \t$dst, WARP_SZ;", Int32Regs,
1375   int_nvvm_read_ptx_sreg_warpsize>;
1376
1377
1378 //-----------------------------------
1379 // Support for ldu on sm_20 or later
1380 //-----------------------------------
1381
1382 // Scalar
1383 multiclass LDU_G<string TyStr, NVPTXRegClass regclass> {
1384   def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
1385                !strconcat("ldu.global.", TyStr),
1386                       []>, Requires<[hasLDU]>;
1387   def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
1388                !strconcat("ldu.global.", TyStr),
1389                         []>, Requires<[hasLDU]>;
1390  def avar:  NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
1391                !strconcat("ldu.global.", TyStr),
1392                       []>, Requires<[hasLDU]>;
1393  def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
1394                !strconcat("ldu.global.", TyStr),
1395                       []>, Requires<[hasLDU]>;
1396  def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
1397                !strconcat("ldu.global.", TyStr),
1398                         []>, Requires<[hasLDU]>;
1399 }
1400
1401 defm INT_PTX_LDU_GLOBAL_i8  : LDU_G<"u8 \t$result, [$src];", Int16Regs>;
1402 defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>;
1403 defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
1404 defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
1405 defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>;
1406 defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>;
1407 defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
1408 defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
1409
1410 // vector
1411
1412 // Elementized vector ldu
1413 multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
1414  def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1415                      (ins Int32Regs:$src),
1416                      !strconcat("ldu.global.", TyStr), []>;
1417  def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1418                      (ins Int64Regs:$src),
1419                      !strconcat("ldu.global.", TyStr), []>;
1420  def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1421                      (ins MEMri:$src),
1422                      !strconcat("ldu.global.", TyStr), []>;
1423  def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1424                      (ins MEMri64:$src),
1425                      !strconcat("ldu.global.", TyStr), []>;
1426  def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1427                      (ins imemAny:$src),
1428                      !strconcat("ldu.global.", TyStr), []>;
1429 }
1430
1431 multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { 
1432  def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1433                             regclass:$dst4), (ins Int32Regs:$src), 
1434                !strconcat("ldu.global.", TyStr), []>;
1435  def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1436                             regclass:$dst4), (ins Int64Regs:$src), 
1437                !strconcat("ldu.global.", TyStr), []>;
1438  def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1439                             regclass:$dst4), (ins MEMri:$src), 
1440                !strconcat("ldu.global.", TyStr), []>;
1441  def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1442                             regclass:$dst4), (ins MEMri64:$src), 
1443                !strconcat("ldu.global.", TyStr), []>;
1444  def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1445                             regclass:$dst4), (ins imemAny:$src), 
1446                !strconcat("ldu.global.", TyStr), []>;
1447 }
1448
1449 defm INT_PTX_LDU_G_v2i8_ELE
1450   : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];",  Int16Regs>;
1451 defm INT_PTX_LDU_G_v2i16_ELE
1452   : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
1453 defm INT_PTX_LDU_G_v2i32_ELE
1454   : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
1455 defm INT_PTX_LDU_G_v2f32_ELE
1456   : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
1457 defm INT_PTX_LDU_G_v2i64_ELE
1458   : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
1459 defm INT_PTX_LDU_G_v2f64_ELE
1460   : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
1461 defm INT_PTX_LDU_G_v4i8_ELE
1462   : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
1463 defm INT_PTX_LDU_G_v4i16_ELE
1464   : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1465     Int16Regs>;
1466 defm INT_PTX_LDU_G_v4i32_ELE
1467   : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1468     Int32Regs>;
1469 defm INT_PTX_LDU_G_v4f32_ELE
1470   : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1471     Float32Regs>;
1472
1473
1474 //-----------------------------------
1475 // Support for ldg on sm_35 or later 
1476 //-----------------------------------
1477
1478 multiclass LDG_G<string TyStr, NVPTXRegClass regclass> {
1479   def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
1480                !strconcat("ld.global.nc.", TyStr),
1481                       []>, Requires<[hasLDG]>;
1482   def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
1483                !strconcat("ld.global.nc.", TyStr),
1484                         []>, Requires<[hasLDG]>;
1485  def avar:  NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
1486                !strconcat("ld.global.nc.", TyStr),
1487                       []>, Requires<[hasLDG]>;
1488  def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
1489                !strconcat("ld.global.nc.", TyStr),
1490                       []>, Requires<[hasLDG]>;
1491  def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
1492                !strconcat("ld.global.nc.", TyStr),
1493                         []>, Requires<[hasLDG]>;
1494 }
1495
1496 defm INT_PTX_LDG_GLOBAL_i8
1497   : LDG_G<"u8 \t$result, [$src];", Int16Regs>;
1498 defm INT_PTX_LDG_GLOBAL_i16
1499   : LDG_G<"u16 \t$result, [$src];", Int16Regs>;
1500 defm INT_PTX_LDG_GLOBAL_i32
1501   : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
1502 defm INT_PTX_LDG_GLOBAL_i64
1503   : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
1504 defm INT_PTX_LDG_GLOBAL_f32
1505   : LDG_G<"f32 \t$result, [$src];", Float32Regs>;
1506 defm INT_PTX_LDG_GLOBAL_f64
1507   : LDG_G<"f64 \t$result, [$src];", Float64Regs>;
1508 defm INT_PTX_LDG_GLOBAL_p32
1509   : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
1510 defm INT_PTX_LDG_GLOBAL_p64
1511   : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
1512
1513 // vector
1514
1515 // Elementized vector ldg 
1516 multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
1517  def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1518                      (ins Int32Regs:$src),
1519                      !strconcat("ld.global.nc.", TyStr), []>;
1520  def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1521                      (ins Int64Regs:$src),
1522                      !strconcat("ld.global.nc.", TyStr), []>;
1523  def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1524                      (ins MEMri:$src),
1525                      !strconcat("ld.global.nc.", TyStr), []>;
1526  def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1527                      (ins MEMri64:$src),
1528                      !strconcat("ld.global.nc.", TyStr), []>;
1529  def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1530                      (ins imemAny:$src),
1531                      !strconcat("ld.global.nc.", TyStr), []>;
1532 }
1533
1534 multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { 
1535   def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1536                               regclass:$dst4), (ins Int32Regs:$src), 
1537                !strconcat("ld.global.nc.", TyStr), []>;
1538   def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1539                                regclass:$dst4), (ins Int64Regs:$src), 
1540                !strconcat("ld.global.nc.", TyStr), []>;
1541   def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1542                               regclass:$dst4), (ins MEMri:$src), 
1543                !strconcat("ld.global.nc.", TyStr), []>;
1544   def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1545                               regclass:$dst4), (ins MEMri64:$src), 
1546                !strconcat("ld.global.nc.", TyStr), []>;
1547   def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1548                              regclass:$dst4), (ins imemAny:$src), 
1549                !strconcat("ld.global.nc.", TyStr), []>;
1550 }
1551
1552 // FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
1553 defm INT_PTX_LDG_G_v2i8_ELE
1554   : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];",  Int16Regs>;
1555 defm INT_PTX_LDG_G_v2i16_ELE
1556   : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
1557 defm INT_PTX_LDG_G_v2i32_ELE
1558   : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
1559 defm INT_PTX_LDG_G_v2f32_ELE
1560   : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
1561 defm INT_PTX_LDG_G_v2i64_ELE
1562   : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
1563 defm INT_PTX_LDG_G_v2f64_ELE
1564   : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
1565 defm INT_PTX_LDG_G_v4i8_ELE
1566   : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
1567 defm INT_PTX_LDG_G_v4i16_ELE
1568   : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
1569 defm INT_PTX_LDG_G_v4i32_ELE
1570   : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>;
1571 defm INT_PTX_LDG_G_v4f32_ELE
1572   : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
1573
1574
1575 multiclass NG_TO_G<string Str, Intrinsic Intrin> {
1576    def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
1577           !strconcat("cvta.", !strconcat(Str, ".u32 \t$result, $src;")),
1578       [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>,
1579    Requires<[hasGenericLdSt]>;
1580    def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
1581           !strconcat("cvta.", !strconcat(Str, ".u64 \t$result, $src;")),
1582       [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>,
1583    Requires<[hasGenericLdSt]>;
1584
1585 // @TODO: Are these actually needed?  I believe global addresses will be copied
1586 // to register values anyway.
1587    /*def __addr_yes : NVPTXInst<(outs Int32Regs:$result), (ins imemAny:$src),
1588           !strconcat("cvta.", !strconcat(Str, ".u32 \t$result, $src;")),
1589       [(set Int32Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>,
1590       Requires<[hasGenericLdSt]>;
1591    def __addr_yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins imemAny:$src),
1592           !strconcat("cvta.", !strconcat(Str, ".u64 \t$result, $src;")),
1593       [(set Int64Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>,
1594       Requires<[hasGenericLdSt]>;*/
1595
1596    def _no : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
1597           "mov.u32 \t$result, $src;",
1598       [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
1599    def _no_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
1600           "mov.u64 \t$result, $src;",
1601       [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
1602
1603 // @TODO: Are these actually needed?  I believe global addresses will be copied
1604 // to register values anyway.
1605    /*def _addr_no : NVPTXInst<(outs Int32Regs:$result), (ins imem:$src),
1606           "mov.u32 \t$result, $src;",
1607       [(set Int32Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>;
1608    def _addr_no_64 : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src),
1609           "mov.u64 \t$result, $src;",
1610       [(set Int64Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>;*/
1611 }
1612
1613 multiclass G_TO_NG<string Str, Intrinsic Intrin> {
1614    def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
1615           !strconcat("cvta.to.", !strconcat(Str, ".u32 \t$result, $src;")),
1616       [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>,
1617    Requires<[hasGenericLdSt]>;
1618    def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
1619           !strconcat("cvta.to.", !strconcat(Str, ".u64 \t$result, $src;")),
1620       [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>,
1621    Requires<[hasGenericLdSt]>;
1622    def _no : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
1623           "mov.u32 \t$result, $src;",
1624       [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
1625    def _no_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
1626           "mov.u64 \t$result, $src;",
1627       [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
1628 }
1629
1630 defm cvta_local  : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>;
1631 defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>;
1632 defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>;
1633 defm cvta_const  : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>;
1634
1635 defm cvta_to_local   : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>;
1636 defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>;
1637 defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>;
1638 defm cvta_to_const  : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>;
1639
1640
1641 // nvvm.ptr.gen.to.param
1642 def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result),
1643   (ins Int32Regs:$src),
1644                         "mov.u32 \t$result, $src;",
1645                               [(set Int32Regs:$result,
1646                                 (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>;
1647 def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result),
1648   (ins Int64Regs:$src),
1649                         "mov.u64 \t$result, $src;",
1650                               [(set Int64Regs:$result,
1651                                 (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>;
1652
1653
1654 // nvvm.move intrinsicc
1655 def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s),
1656                              "mov.b16 \t$r, $s;",
1657                              [(set Int16Regs:$r,
1658                                (int_nvvm_move_i16 Int16Regs:$s))]>;
1659 def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
1660                              "mov.b32 \t$r, $s;",
1661                              [(set Int32Regs:$r,
1662                                (int_nvvm_move_i32 Int32Regs:$s))]>;
1663 def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
1664                              "mov.b64 \t$r, $s;",
1665                              [(set Int64Regs:$r,
1666                                (int_nvvm_move_i64 Int64Regs:$s))]>;
1667 def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s),
1668                              "mov.f32 \t$r, $s;",
1669                              [(set Float32Regs:$r,
1670                                (int_nvvm_move_float Float32Regs:$s))]>;
1671 def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s),
1672                              "mov.f64 \t$r, $s;",
1673                              [(set Float64Regs:$r,
1674                                (int_nvvm_move_double Float64Regs:$s))]>;
1675 def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
1676                              "mov.u32 \t$r, $s;",
1677                              [(set Int32Regs:$r,
1678                                (int_nvvm_move_ptr Int32Regs:$s))]>;
1679 def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
1680                              "mov.u64 \t$r, $s;",
1681                              [(set Int64Regs:$r,
1682                                (int_nvvm_move_ptr Int64Regs:$s))]>;
1683
1684 // @TODO: Are these actually needed, or will we always just see symbols
1685 // copied to registers first?
1686 /*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s),
1687                              "mov.u32 \t$r, $s;",
1688                              [(set Int32Regs:$r,
1689                              (int_nvvm_move_ptr texternalsym:$s))]>;
1690 def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s),
1691                              "mov.u64 \t$r, $s;",
1692                              [(set Int64Regs:$r,
1693                              (int_nvvm_move_ptr texternalsym:$s))]>;*/
1694
1695
1696 // MoveParam        %r1, param
1697 // ptr_local_to_gen %r2, %r1
1698 // ptr_gen_to_local %r3, %r2
1699 // ->
1700 // mov %r1, param
1701
1702 // @TODO: Revisit this.  There is a type
1703 // contradiction between iPTRAny and iPTR for the addr defs, so the move_sym
1704 // instructions are not currently defined. However, we can use the ptr
1705 // variants and the asm printer will do the right thing.
1706 def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
1707                 (MoveParam texternalsym:$src)))),
1708                (nvvm_move_ptr64  texternalsym:$src)>;
1709 def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
1710                 (MoveParam texternalsym:$src)))),
1711                (nvvm_move_ptr32  texternalsym:$src)>;
1712
1713 def texsurf_handles
1714   : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src),
1715               "mov.u64 \t$result, $src;", []>;
1716
1717 //-----------------------------------
1718 // Compiler Error Warn
1719 // - Just ignore them in codegen
1720 //-----------------------------------
1721
1722 def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
1723                 "// llvm.nvvm.compiler.warn()",
1724                 [(int_nvvm_compiler_warn Int32Regs:$a)]>;
1725 def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
1726                 "// llvm.nvvm.compiler.warn()",
1727                 [(int_nvvm_compiler_warn Int64Regs:$a)]>;
1728 def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
1729                 "// llvm.nvvm.compiler.error()",
1730                 [(int_nvvm_compiler_error Int32Regs:$a)]>;
1731 def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
1732                 "// llvm.nvvm.compiler.error()",
1733                 [(int_nvvm_compiler_error Int64Regs:$a)]>;
1734
1735
1736 // isspacep
1737
1738 def ISSPACEP_CONST_32
1739   : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
1740               "isspacep.const \t$d, $a;",
1741               [(set Int1Regs:$d, (int_nvvm_isspacep_const Int32Regs:$a))]>,
1742     Requires<[hasPTX31]>;
1743 def ISSPACEP_CONST_64
1744   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
1745               "isspacep.const \t$d, $a;",
1746               [(set Int1Regs:$d, (int_nvvm_isspacep_const Int64Regs:$a))]>,
1747     Requires<[hasPTX31]>;
1748 def ISSPACEP_GLOBAL_32
1749   : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
1750               "isspacep.global \t$d, $a;",
1751               [(set Int1Regs:$d, (int_nvvm_isspacep_global Int32Regs:$a))]>;
1752 def ISSPACEP_GLOBAL_64
1753   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
1754               "isspacep.global \t$d, $a;",
1755               [(set Int1Regs:$d, (int_nvvm_isspacep_global Int64Regs:$a))]>;
1756 def ISSPACEP_LOCAL_32
1757   : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
1758               "isspacep.local \t$d, $a;",
1759               [(set Int1Regs:$d, (int_nvvm_isspacep_local Int32Regs:$a))]>;
1760 def ISSPACEP_LOCAL_64
1761   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
1762               "isspacep.local \t$d, $a;",
1763               [(set Int1Regs:$d, (int_nvvm_isspacep_local Int64Regs:$a))]>;
1764 def ISSPACEP_SHARED_32
1765   : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
1766               "isspacep.shared \t$d, $a;",
1767               [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int32Regs:$a))]>;
1768 def ISSPACEP_SHARED_64
1769   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
1770               "isspacep.shared \t$d, $a;",
1771               [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int64Regs:$a))]>;
1772
1773
1774 // Special register reads
1775 def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d),
1776                             (ins SpecialRegs:$r),
1777                             "mov.b32\t$d, $r;", []>;
1778
1779 def : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>;
1780 def : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>;
1781 def : Pat<(int_nvvm_read_ptx_sreg_envreg2), (MOV_SPECIAL ENVREG2)>;
1782 def : Pat<(int_nvvm_read_ptx_sreg_envreg3), (MOV_SPECIAL ENVREG3)>;
1783 def : Pat<(int_nvvm_read_ptx_sreg_envreg4), (MOV_SPECIAL ENVREG4)>;
1784 def : Pat<(int_nvvm_read_ptx_sreg_envreg5), (MOV_SPECIAL ENVREG5)>;
1785 def : Pat<(int_nvvm_read_ptx_sreg_envreg6), (MOV_SPECIAL ENVREG6)>;
1786 def : Pat<(int_nvvm_read_ptx_sreg_envreg7), (MOV_SPECIAL ENVREG7)>;
1787 def : Pat<(int_nvvm_read_ptx_sreg_envreg8), (MOV_SPECIAL ENVREG8)>;
1788 def : Pat<(int_nvvm_read_ptx_sreg_envreg9), (MOV_SPECIAL ENVREG9)>;
1789 def : Pat<(int_nvvm_read_ptx_sreg_envreg10), (MOV_SPECIAL ENVREG10)>;
1790 def : Pat<(int_nvvm_read_ptx_sreg_envreg11), (MOV_SPECIAL ENVREG11)>;
1791 def : Pat<(int_nvvm_read_ptx_sreg_envreg12), (MOV_SPECIAL ENVREG12)>;
1792 def : Pat<(int_nvvm_read_ptx_sreg_envreg13), (MOV_SPECIAL ENVREG13)>;
1793 def : Pat<(int_nvvm_read_ptx_sreg_envreg14), (MOV_SPECIAL ENVREG14)>;
1794 def : Pat<(int_nvvm_read_ptx_sreg_envreg15), (MOV_SPECIAL ENVREG15)>;
1795 def : Pat<(int_nvvm_read_ptx_sreg_envreg16), (MOV_SPECIAL ENVREG16)>;
1796 def : Pat<(int_nvvm_read_ptx_sreg_envreg17), (MOV_SPECIAL ENVREG17)>;
1797 def : Pat<(int_nvvm_read_ptx_sreg_envreg18), (MOV_SPECIAL ENVREG18)>;
1798 def : Pat<(int_nvvm_read_ptx_sreg_envreg19), (MOV_SPECIAL ENVREG19)>;
1799 def : Pat<(int_nvvm_read_ptx_sreg_envreg20), (MOV_SPECIAL ENVREG20)>;
1800 def : Pat<(int_nvvm_read_ptx_sreg_envreg21), (MOV_SPECIAL ENVREG21)>;
1801 def : Pat<(int_nvvm_read_ptx_sreg_envreg22), (MOV_SPECIAL ENVREG22)>;
1802 def : Pat<(int_nvvm_read_ptx_sreg_envreg23), (MOV_SPECIAL ENVREG23)>;
1803 def : Pat<(int_nvvm_read_ptx_sreg_envreg24), (MOV_SPECIAL ENVREG24)>;
1804 def : Pat<(int_nvvm_read_ptx_sreg_envreg25), (MOV_SPECIAL ENVREG25)>;
1805 def : Pat<(int_nvvm_read_ptx_sreg_envreg26), (MOV_SPECIAL ENVREG26)>;
1806 def : Pat<(int_nvvm_read_ptx_sreg_envreg27), (MOV_SPECIAL ENVREG27)>;
1807 def : Pat<(int_nvvm_read_ptx_sreg_envreg28), (MOV_SPECIAL ENVREG28)>;
1808 def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>;
1809 def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>;
1810 def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>;
1811
1812
1813 // rotate builtin support
1814
1815 def ROTATE_B32_HW_IMM
1816   : NVPTXInst<(outs Int32Regs:$dst),
1817               (ins  Int32Regs:$src, i32imm:$amt),
1818               "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
1819               [(set Int32Regs:$dst,
1820                  (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>,
1821               Requires<[hasHWROT32]> ;
1822
1823 def ROTATE_B32_HW_REG
1824   : NVPTXInst<(outs Int32Regs:$dst),
1825               (ins  Int32Regs:$src, Int32Regs:$amt),
1826               "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
1827               [(set Int32Regs:$dst,
1828                  (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>,
1829               Requires<[hasHWROT32]> ;
1830
1831 def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)),
1832           (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
1833       Requires<[noHWROT32]> ;
1834
1835 def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt),
1836           (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>,
1837       Requires<[noHWROT32]> ;
1838
1839 def GET_LO_INT64
1840   : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
1841               !strconcat("{{\n\t",
1842               !strconcat(".reg .b32 %dummy;\n\t",
1843               !strconcat("mov.b64 \t{$dst,%dummy}, $src;\n\t",
1844         !strconcat("}}", "")))),
1845         []> ;
1846
1847 def GET_HI_INT64
1848   : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
1849               !strconcat("{{\n\t",
1850               !strconcat(".reg .b32 %dummy;\n\t",
1851               !strconcat("mov.b64 \t{%dummy,$dst}, $src;\n\t",
1852         !strconcat("}}", "")))),
1853         []> ;
1854
1855 def PACK_TWO_INT32
1856   : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi),
1857               "mov.b64 \t$dst, {{$lo, $hi}};", []> ;
1858
1859 def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src),
1860           (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src),
1861                           (GET_LO_INT64 Int64Regs:$src))> ;
1862
1863 // funnel shift, requires >= sm_32
1864 def SHF_L_WRAP_B32_IMM
1865   : NVPTXInst<(outs Int32Regs:$dst),
1866               (ins  Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
1867               "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
1868     Requires<[hasHWROT32]>;
1869
1870 def SHF_L_WRAP_B32_REG
1871   : NVPTXInst<(outs Int32Regs:$dst),
1872               (ins  Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
1873               "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
1874     Requires<[hasHWROT32]>;
1875
1876 def SHF_R_WRAP_B32_IMM
1877   : NVPTXInst<(outs Int32Regs:$dst),
1878               (ins  Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
1879               "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
1880     Requires<[hasHWROT32]>;
1881
1882 def SHF_R_WRAP_B32_REG
1883   : NVPTXInst<(outs Int32Regs:$dst),
1884               (ins  Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
1885               "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
1886     Requires<[hasHWROT32]>;
1887
1888 // HW version of rotate 64
1889 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
1890           (PACK_TWO_INT32
1891             (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
1892                                 (GET_LO_INT64 Int64Regs:$src), imm:$amt),
1893             (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
1894                                 (GET_HI_INT64 Int64Regs:$src), imm:$amt))>,
1895       Requires<[hasHWROT32]>;
1896
1897 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
1898           (PACK_TWO_INT32
1899             (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
1900                                 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt),
1901             (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
1902                                (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>,
1903       Requires<[hasHWROT32]>;
1904
1905
1906 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
1907           (PACK_TWO_INT32
1908             (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
1909                                 (GET_HI_INT64 Int64Regs:$src), imm:$amt),
1910             (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
1911                                 (GET_LO_INT64 Int64Regs:$src), imm:$amt))>,
1912       Requires<[hasHWROT32]>;
1913
1914 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
1915           (PACK_TWO_INT32
1916             (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
1917                                 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt),
1918             (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
1919                                (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>,
1920       Requires<[hasHWROT32]>;
1921
1922 // SW version of rotate 64
1923 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
1924           (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
1925       Requires<[noHWROT32]>;
1926 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
1927           (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
1928       Requires<[noHWROT32]>;
1929 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
1930           (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>,
1931       Requires<[noHWROT32]>;
1932 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
1933           (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
1934       Requires<[noHWROT32]>;
1935
1936
1937 //-----------------------------------
1938 // Texture Intrinsics
1939 //-----------------------------------
1940
1941 // NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be
1942 // also defined in NVPTXReplaceImageHandles.cpp
1943
1944 // texmode_independent
1945 let IsTex = 1, IsTexModeUnified = 0 in {
1946 // Texture fetch instructions using handles
1947 def TEX_1D_F32_S32
1948   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
1949                     Float32Regs:$b, Float32Regs:$a),
1950               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
1951               "tex.1d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
1952               []>;
1953 def TEX_1D_F32_F32
1954   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
1955                     Float32Regs:$b, Float32Regs:$a),
1956               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
1957               "tex.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
1958               []>;
1959 def TEX_1D_F32_F32_LEVEL
1960   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
1961                     Float32Regs:$b, Float32Regs:$a),
1962               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$lod),
1963               "tex.level.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
1964               "[$t, $s, \\{$x\\}], $lod;",
1965               []>;
1966 def TEX_1D_F32_F32_GRAD
1967   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
1968                     Float32Regs:$b, Float32Regs:$a),
1969               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
1970                    Float32Regs:$gradx, Float32Regs:$grady),
1971               "tex.grad.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
1972               "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
1973               []>;
1974 def TEX_1D_S32_S32
1975   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
1976                     Int32Regs:$b, Int32Regs:$a),
1977               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
1978               "tex.1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
1979               []>;
1980 def TEX_1D_S32_F32
1981   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
1982                     Int32Regs:$b, Int32Regs:$a),
1983               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
1984               "tex.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
1985               []>;
1986 def TEX_1D_S32_F32_LEVEL
1987   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
1988                     Int32Regs:$b, Int32Regs:$a),
1989               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
1990                    Float32Regs:$lod),
1991               "tex.level.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
1992               "[$t, $s, \\{$x\\}], $lod;",
1993               []>;
1994 def TEX_1D_S32_F32_GRAD
1995   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
1996                     Int32Regs:$b, Int32Regs:$a),
1997               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
1998                    Float32Regs:$gradx, Float32Regs:$grady),
1999               "tex.grad.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2000               "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2001               []>;
2002 def TEX_1D_U32_S32
2003   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2004                     Int32Regs:$b, Int32Regs:$a),
2005               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
2006               "tex.1d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2007               []>;
2008 def TEX_1D_U32_F32
2009   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2010                     Int32Regs:$b, Int32Regs:$a),
2011               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
2012               "tex.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2013               []>;
2014 def TEX_1D_U32_F32_LEVEL
2015   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2016                     Int32Regs:$b, Int32Regs:$a),
2017               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2018                    Float32Regs:$lod),
2019               "tex.level.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2020               "[$t, $s, \\{$x\\}], $lod;",
2021               []>;
2022 def TEX_1D_U32_F32_GRAD
2023   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2024                     Int32Regs:$b, Int32Regs:$a),
2025               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2026                    Float32Regs:$gradx, Float32Regs:$grady),
2027               "tex.grad.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2028               "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2029               []>;
2030
2031 def TEX_1D_ARRAY_F32_S32
2032   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2033                     Float32Regs:$b, Float32Regs:$a),
2034               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2035               "tex.a1d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
2036               "[$t, $s, \\{$l, $x\\}];",
2037               []>;
2038 def TEX_1D_ARRAY_F32_F32
2039   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2040                     Float32Regs:$b, Float32Regs:$a),
2041               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2042               "tex.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2043               "[$t, $s, \\{$l, $x\\}];",
2044               []>;
2045 def TEX_1D_ARRAY_F32_F32_LEVEL
2046   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2047                     Float32Regs:$b, Float32Regs:$a),
2048               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2049                    Float32Regs:$lod),
2050               "tex.level.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2051               "[$t, $s, \\{$l, $x\\}], $lod;",
2052               []>;
2053 def TEX_1D_ARRAY_F32_F32_GRAD
2054   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2055                     Float32Regs:$b, Float32Regs:$a),
2056               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2057                    Float32Regs:$gradx, Float32Regs:$grady),
2058               "tex.grad.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2059               "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2060               []>;
2061 def TEX_1D_ARRAY_S32_S32
2062   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2063                     Int32Regs:$b, Int32Regs:$a),
2064               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2065               "tex.a1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
2066               "[$t, $s, \\{$l, $x\\}];",
2067               []>;
2068 def TEX_1D_ARRAY_S32_F32
2069   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2070                     Int32Regs:$b, Int32Regs:$a),
2071               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2072               "tex.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2073               "[$t, $s, \\{$l, $x\\}];",
2074               []>;
2075 def TEX_1D_ARRAY_S32_F32_LEVEL
2076   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2077                     Int32Regs:$b, Int32Regs:$a),
2078               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2079                    Float32Regs:$lod),
2080               "tex.level.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2081               "[$t, $s, \\{$l, $x\\}], $lod;",
2082               []>;
2083 def TEX_1D_ARRAY_S32_F32_GRAD
2084   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2085                     Int32Regs:$b, Int32Regs:$a),
2086               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2087                    Float32Regs:$gradx, Float32Regs:$grady),
2088               "tex.grad.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2089               "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2090               []>;
2091 def TEX_1D_ARRAY_U32_S32
2092   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2093                     Int32Regs:$b, Int32Regs:$a),
2094               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2095               "tex.a1d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
2096               "[$t, $s, \\{$l, $x\\}];",
2097               []>;
2098 def TEX_1D_ARRAY_U32_F32
2099   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2100                     Int32Regs:$b, Int32Regs:$a),
2101               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2102               "tex.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2103               "[$t, $s, \\{$l, $x\\}];",
2104               []>;
2105 def TEX_1D_ARRAY_U32_F32_LEVEL
2106   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2107                     Int32Regs:$b, Int32Regs:$a),
2108               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2109                    Float32Regs:$lod),
2110               "tex.level.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2111               "[$t, $s, \\{$l, $x\\}], $lod;",
2112               []>;
2113 def TEX_1D_ARRAY_U32_F32_GRAD
2114   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2115                     Int32Regs:$b, Int32Regs:$a),
2116               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2117                    Float32Regs:$gradx, Float32Regs:$grady),
2118               "tex.grad.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2119               "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2120               []>;
2121
2122 def TEX_2D_F32_S32
2123   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2124                     Float32Regs:$b, Float32Regs:$a),
2125               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2126               "tex.2d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
2127               "[$t, $s, \\{$x, $y\\}];",
2128               []>;
2129 def TEX_2D_F32_F32
2130   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2131                     Float32Regs:$b, Float32Regs:$a),
2132               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2133               "tex.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2134               "[$t, $s, \\{$x, $y\\}];",
2135               []>;
2136 def TEX_2D_F32_F32_LEVEL
2137   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2138                     Float32Regs:$b, Float32Regs:$a),
2139               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2140                    Float32Regs:$lod),
2141               "tex.level.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2142               "[$t, $s, \\{$x, $y\\}], $lod;",
2143               []>;
2144 def TEX_2D_F32_F32_GRAD
2145   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2146                     Float32Regs:$b, Float32Regs:$a),
2147               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2148                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2149                    Float32Regs:$grady0, Float32Regs:$grady1),
2150               "tex.grad.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2151               "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2152               "\\{$grady0, $grady1\\};",
2153               []>;
2154 def TEX_2D_S32_S32
2155   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2156                     Int32Regs:$b, Int32Regs:$a),
2157               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2158               "tex.2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
2159               "[$t, $s, \\{$x, $y\\}];",
2160               []>;
2161 def TEX_2D_S32_F32
2162   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2163                     Int32Regs:$b, Int32Regs:$a),
2164               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2165               "tex.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2166               "[$t, $s, \\{$x, $y\\}];",
2167               []>;
2168 def TEX_2D_S32_F32_LEVEL
2169   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2170                     Int32Regs:$b, Int32Regs:$a),
2171               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2172                    Float32Regs:$lod),
2173               "tex.level.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2174               "[$t, $s, \\{$x, $y\\}], $lod;",
2175               []>;
2176 def TEX_2D_S32_F32_GRAD
2177   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2178                     Int32Regs:$b, Int32Regs:$a),
2179               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2180                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2181                    Float32Regs:$grady0, Float32Regs:$grady1),
2182               "tex.grad.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2183               "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2184               "\\{$grady0, $grady1\\};",
2185               []>;
2186 def TEX_2D_U32_S32
2187   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2188                     Int32Regs:$b, Int32Regs:$a),
2189               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2190               "tex.2d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
2191               "[$t, $s, \\{$x, $y\\}];",
2192               []>;
2193 def TEX_2D_U32_F32
2194   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2195                     Int32Regs:$b, Int32Regs:$a),
2196               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2197               "tex.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2198               "[$t, $s, \\{$x, $y\\}];",
2199               []>;
2200 def TEX_2D_U32_F32_LEVEL
2201   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2202                     Int32Regs:$b, Int32Regs:$a),
2203               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2204                    Float32Regs:$lod),
2205               "tex.level.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2206               "[$t, $s, \\{$x, $y\\}], $lod;",
2207               []>;
2208 def TEX_2D_U32_F32_GRAD
2209   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2210                     Int32Regs:$b, Int32Regs:$a),
2211               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2212                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2213                    Float32Regs:$grady0, Float32Regs:$grady1),
2214               "tex.grad.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2215               "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2216               "\\{$grady0, $grady1\\};",
2217               []>;
2218
2219 def TEX_2D_ARRAY_F32_S32
2220   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2221                     Float32Regs:$b, Float32Regs:$a),
2222               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2223                    Int32Regs:$y),
2224               "tex.a2d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
2225               "[$t, $s, \\{$l, $x, $y, $y\\}];",
2226               []>;
2227 def TEX_2D_ARRAY_F32_F32
2228   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2229                     Float32Regs:$b, Float32Regs:$a),
2230               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2231                    Float32Regs:$y),
2232               "tex.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2233               "[$t, $s, \\{$l, $x, $y, $y\\}];",
2234               []>;
2235 def TEX_2D_ARRAY_F32_F32_LEVEL
2236   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2237                     Float32Regs:$b, Float32Regs:$a),
2238               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2239                    Float32Regs:$y, Float32Regs:$lod),
2240               "tex.level.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2241               "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2242               []>;
2243 def TEX_2D_ARRAY_F32_F32_GRAD
2244   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2245                     Float32Regs:$b, Float32Regs:$a),
2246               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2247                    Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
2248                    Float32Regs:$grady0, Float32Regs:$grady1),
2249               "tex.grad.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2250               "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2251               "\\{$grady0, $grady1\\};",
2252               []>;
2253 def TEX_2D_ARRAY_S32_S32
2254   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2255                     Int32Regs:$b, Int32Regs:$a),
2256               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2257                    Int32Regs:$y),
2258               "tex.a2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
2259               "[$t, $s, \\{$l, $x, $y, $y\\}];",
2260               []>;
2261 def TEX_2D_ARRAY_S32_F32
2262   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2263                     Int32Regs:$b, Int32Regs:$a),
2264               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2265                    Float32Regs:$y),
2266               "tex.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2267               "[$t, $s, \\{$l, $x, $y, $y\\}];",
2268               []>;
2269 def TEX_2D_ARRAY_S32_F32_LEVEL
2270   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2271                     Int32Regs:$b, Int32Regs:$a),
2272               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2273                    Float32Regs:$y, Float32Regs:$lod),
2274               "tex.level.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2275               "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2276               []>;
2277 def TEX_2D_ARRAY_S32_F32_GRAD
2278   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2279                     Int32Regs:$b, Int32Regs:$a),
2280               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2281                    Float32Regs:$y,
2282                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2283                    Float32Regs:$grady0, Float32Regs:$grady1),
2284               "tex.grad.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2285               "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2286               "\\{$grady0, $grady1\\};",
2287               []>;
2288 def TEX_2D_ARRAY_U32_S32
2289   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2290                     Int32Regs:$b, Int32Regs:$a),
2291               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2292                    Int32Regs:$y),
2293               "tex.a2d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
2294               "[$t, $s, \\{$l, $x, $y, $y\\}];",
2295               []>;
2296 def TEX_2D_ARRAY_U32_F32
2297   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2298                     Int32Regs:$b, Int32Regs:$a),
2299               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2300                    Float32Regs:$y),
2301               "tex.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2302               "[$t, $s, \\{$l, $x, $y, $y\\}];",
2303               []>;
2304 def TEX_2D_ARRAY_U32_F32_LEVEL
2305   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2306                     Int32Regs:$b, Int32Regs:$a),
2307               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2308                    Float32Regs:$y, Float32Regs:$lod),
2309               "tex.level.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2310               "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2311               []>;
2312 def TEX_2D_ARRAY_U32_F32_GRAD
2313   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2314                     Int32Regs:$b, Int32Regs:$a),
2315               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2316                    Float32Regs:$y,
2317                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2318                    Float32Regs:$grady0, Float32Regs:$grady1),
2319               "tex.grad.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2320               "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2321               "\\{$grady0, $grady1\\};",
2322               []>;
2323
2324 def TEX_3D_F32_S32
2325   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2326                     Float32Regs:$b, Float32Regs:$a),
2327               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2328                    Int32Regs:$z),
2329               "tex.3d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
2330               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2331               []>;
2332 def TEX_3D_F32_F32
2333   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2334                     Float32Regs:$b, Float32Regs:$a),
2335               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2336                    Float32Regs:$z),
2337               "tex.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2338               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2339               []>;
2340 def TEX_3D_F32_F32_LEVEL
2341   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2342                     Float32Regs:$b, Float32Regs:$a),
2343               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2344                    Float32Regs:$z, Float32Regs:$lod),
2345               "tex.level.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2346               "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2347               []>;
2348 def TEX_3D_F32_F32_GRAD
2349   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2350                     Float32Regs:$b, Float32Regs:$a),
2351               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2352                    Float32Regs:$z,
2353                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2354                    Float32Regs:$gradx2, Float32Regs:$grady0,
2355                    Float32Regs:$grady1, Float32Regs:$grady2),
2356               "tex.grad.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2357               "[$t, $s, \\{$x, $y, $z, $z\\}], "
2358               "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2359               "\\{$grady0, $grady1, $grady2, $grady2\\};",
2360               []>;
2361 def TEX_3D_S32_S32
2362   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2363                     Int32Regs:$b, Int32Regs:$a),
2364               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2365                    Int32Regs:$z),
2366               "tex.3d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
2367               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2368               []>;
2369 def TEX_3D_S32_F32
2370   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2371                     Int32Regs:$b, Int32Regs:$a),
2372               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2373                    Float32Regs:$z),
2374               "tex.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2375               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2376               []>;
2377 def TEX_3D_S32_F32_LEVEL
2378   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2379                     Int32Regs:$b, Int32Regs:$a),
2380               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2381                    Float32Regs:$z, Float32Regs:$lod),
2382               "tex.level.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2383               "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2384               []>;
2385 def TEX_3D_S32_F32_GRAD
2386   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2387                     Int32Regs:$b, Int32Regs:$a),
2388               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2389                    Float32Regs:$z,
2390                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2391                    Float32Regs:$gradx2, Float32Regs:$grady0,
2392                    Float32Regs:$grady1, Float32Regs:$grady2),
2393               "tex.grad.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2394               "[$t, $s, \\{$x, $y, $z, $z\\}], "
2395               "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2396               "\\{$grady0, $grady1, $grady2, $grady2\\};",
2397               []>;
2398 def TEX_3D_U32_S32
2399   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2400                     Int32Regs:$b, Int32Regs:$a),
2401               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2402                    Int32Regs:$z),
2403               "tex.3d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
2404               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2405               []>;
2406 def TEX_3D_U32_F32
2407   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2408                     Int32Regs:$b, Int32Regs:$a),
2409               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2410                    Float32Regs:$z),
2411               "tex.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2412               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2413               []>;
2414 def TEX_3D_U32_F32_LEVEL
2415   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2416                     Int32Regs:$b, Int32Regs:$a),
2417               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2418                    Float32Regs:$z, Float32Regs:$lod),
2419               "tex.level.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2420               "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2421               []>;
2422 def TEX_3D_U32_F32_GRAD
2423   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2424                     Int32Regs:$b, Int32Regs:$a),
2425               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2426                    Float32Regs:$z,
2427                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2428                    Float32Regs:$gradx2, Float32Regs:$grady0,
2429                    Float32Regs:$grady1, Float32Regs:$grady2),
2430               "tex.grad.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2431               "[$t, $s, \\{$x, $y, $z, $z\\}], "
2432               "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2433               "\\{$grady0, $grady1, $grady2, $grady2\\};",
2434               []>;
2435
2436 def TEX_CUBE_F32_F32
2437   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2438                     Float32Regs:$b, Float32Regs:$a),
2439               (ins Int64Regs:$t, Int64Regs:$s,
2440                Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2441               "tex.cube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2442               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2443               []>;
2444 def TEX_CUBE_F32_F32_LEVEL
2445   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2446                     Float32Regs:$b, Float32Regs:$a),
2447               (ins Int64Regs:$t, Int64Regs:$s,
2448                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2449                    Float32Regs:$lod),
2450               "tex.level.cube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2451               "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2452               []>;
2453 def TEX_CUBE_S32_F32
2454   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2455                     Int32Regs:$b, Int32Regs:$a),
2456               (ins Int64Regs:$t, Int64Regs:$s,
2457                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2458               "tex.cube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2459               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2460               []>;
2461 def TEX_CUBE_S32_F32_LEVEL
2462   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2463                     Int32Regs:$b, Int32Regs:$a),
2464               (ins Int64Regs:$t, Int64Regs:$s,
2465                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2466                    Float32Regs:$lod),
2467               "tex.level.cube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2468               "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2469               []>;
2470 def TEX_CUBE_U32_F32
2471   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2472                     Int32Regs:$b, Int32Regs:$a),
2473               (ins Int64Regs:$t, Int64Regs:$s,
2474                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2475               "tex.cube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2476               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2477               []>;
2478 def TEX_CUBE_U32_F32_LEVEL
2479   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2480                     Int32Regs:$b, Int32Regs:$a),
2481               (ins Int64Regs:$t, Int64Regs:$s,
2482                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2483                    Float32Regs:$lod),
2484               "tex.level.cube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2485               "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2486               []>;
2487
2488 def TEX_CUBE_ARRAY_F32_F32
2489   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2490                     Float32Regs:$b, Float32Regs:$a),
2491               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2492                Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2493               "tex.acube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2494               "[$t, $s, \\{$l, $x, $y, $z\\}];",
2495               []>;
2496 def TEX_CUBE_ARRAY_F32_F32_LEVEL
2497   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2498                     Float32Regs:$b, Float32Regs:$a),
2499               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2500                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2501                    Float32Regs:$lod),
2502               "tex.level.acube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2503               "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
2504               []>;
2505 def TEX_CUBE_ARRAY_S32_F32
2506   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2507                     Int32Regs:$b, Int32Regs:$a),
2508               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2509                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2510               "tex.acube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2511               "[$t, $s, \\{$l, $x, $y, $z\\}];",
2512               []>;
2513 def TEX_CUBE_ARRAY_S32_F32_LEVEL
2514   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2515                     Int32Regs:$b, Int32Regs:$a),
2516               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2517                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2518                    Float32Regs:$lod),
2519               "tex.level.acube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2520               "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
2521               []>;
2522 def TEX_CUBE_ARRAY_U32_F32
2523   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2524                     Int32Regs:$b, Int32Regs:$a),
2525               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2526                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2527               "tex.acube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2528               "[$t, $s, \\{$l, $x, $y, $z\\}];",
2529               []>;
2530 def TEX_CUBE_ARRAY_U32_F32_LEVEL
2531   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2532                     Int32Regs:$b, Int32Regs:$a),
2533               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2534                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2535                    Float32Regs:$lod),
2536               "tex.level.acube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2537               "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
2538               []>;
2539
2540 def TLD4_R_2D_F32_F32
2541   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2542                     Float32Regs:$v2, Float32Regs:$v3),
2543               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2544               "tld4.r.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2545               "[$t, $s, \\{$x, $y\\}];",
2546               []>;
2547 def TLD4_G_2D_F32_F32
2548   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2549                     Float32Regs:$v2, Float32Regs:$v3),
2550               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2551               "tld4.g.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2552               "[$t, $s, \\{$x, $y\\}];",
2553               []>;
2554 def TLD4_B_2D_F32_F32
2555   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2556                     Float32Regs:$v2, Float32Regs:$v3),
2557               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2558               "tld4.b.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2559               "[$t, $s, \\{$x, $y\\}];",
2560               []>;
2561 def TLD4_A_2D_F32_F32
2562   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2563                     Float32Regs:$v2, Float32Regs:$v3),
2564               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2565               "tld4.a.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2566               "[$t, $s, \\{$x, $y\\}];",
2567               []>;
2568 def TLD4_R_2D_S32_F32
2569   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2570                     Int32Regs:$v2, Int32Regs:$v3),
2571               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2572               "tld4.r.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2573               "[$t, $s, \\{$x, $y\\}];",
2574               []>;
2575 def TLD4_G_2D_S32_F32
2576   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2577                     Int32Regs:$v2, Int32Regs:$v3),
2578               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2579               "tld4.g.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2580               "[$t, $s, \\{$x, $y\\}];",
2581               []>;
2582 def TLD4_B_2D_S32_F32
2583   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2584                     Int32Regs:$v2, Int32Regs:$v3),
2585               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2586               "tld4.b.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2587               "[$t, $s, \\{$x, $y\\}];",
2588               []>;
2589 def TLD4_A_2D_S32_F32
2590   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2591                     Int32Regs:$v2, Int32Regs:$v3),
2592               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2593               "tld4.a.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2594               "[$t, $s, \\{$x, $y\\}];",
2595               []>;
2596 def TLD4_R_2D_U32_F32
2597   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2598                     Int32Regs:$v2, Int32Regs:$v3),
2599               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2600               "tld4.r.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2601               "[$t, $s, \\{$x, $y\\}];",
2602               []>;
2603 def TLD4_G_2D_U32_F32
2604   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2605                     Int32Regs:$v2, Int32Regs:$v3),
2606               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2607               "tld4.g.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2608               "[$t, $s, \\{$x, $y\\}];",
2609               []>;
2610 def TLD4_B_2D_U32_F32
2611   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2612                     Int32Regs:$v2, Int32Regs:$v3),
2613               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2614               "tld4.b.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2615               "[$t, $s, \\{$x, $y\\}];",
2616               []>;
2617 def TLD4_A_2D_U32_F32
2618   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2619                     Int32Regs:$v2, Int32Regs:$v3),
2620               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2621               "tld4.a.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2622               "[$t, $s, \\{$x, $y\\}];",
2623               []>;
2624 }
2625
2626
2627 // texmode_unified
2628 let IsTex = 1, IsTexModeUnified = 1 in {
2629 // Texture fetch instructions using handles
2630 def TEX_UNIFIED_1D_F32_S32
2631   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2632                     Float32Regs:$b, Float32Regs:$a),
2633               (ins Int64Regs:$t, Int32Regs:$x),
2634               "tex.1d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2635               []>;
2636 def TEX_UNIFIED_1D_F32_F32
2637   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2638                     Float32Regs:$b, Float32Regs:$a),
2639               (ins Int64Regs:$t, Float32Regs:$x),
2640               "tex.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2641               []>;
2642 def TEX_UNIFIED_1D_F32_F32_LEVEL
2643   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2644                     Float32Regs:$b, Float32Regs:$a),
2645               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$lod),
2646               "tex.level.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2647               "[$t, \\{$x\\}], $lod;",
2648               []>;
2649 def TEX_UNIFIED_1D_F32_F32_GRAD
2650   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2651                     Float32Regs:$b, Float32Regs:$a),
2652               (ins Int64Regs:$t, Float32Regs:$x,
2653                    Float32Regs:$gradx, Float32Regs:$grady),
2654               "tex.grad.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2655               "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2656               []>;
2657 def TEX_UNIFIED_1D_S32_S32
2658   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2659                     Int32Regs:$b, Int32Regs:$a),
2660               (ins Int64Regs:$t, Int32Regs:$x),
2661               "tex.1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2662               []>;
2663 def TEX_UNIFIED_1D_S32_F32
2664   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2665                     Int32Regs:$b, Int32Regs:$a),
2666               (ins Int64Regs:$t, Float32Regs:$x),
2667               "tex.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2668               []>;
2669 def TEX_UNIFIED_1D_S32_F32_LEVEL
2670   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2671                     Int32Regs:$b, Int32Regs:$a),
2672               (ins Int64Regs:$t, Float32Regs:$x,
2673                    Float32Regs:$lod),
2674               "tex.level.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2675               "[$t, \\{$x\\}], $lod;",
2676               []>;
2677 def TEX_UNIFIED_1D_S32_F32_GRAD
2678   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2679                     Int32Regs:$b, Int32Regs:$a),
2680               (ins Int64Regs:$t, Float32Regs:$x,
2681                    Float32Regs:$gradx, Float32Regs:$grady),
2682               "tex.grad.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2683               "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2684               []>;
2685 def TEX_UNIFIED_1D_U32_S32
2686   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2687                     Int32Regs:$b, Int32Regs:$a),
2688               (ins Int64Regs:$t, Int32Regs:$x),
2689               "tex.1d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2690               []>;
2691 def TEX_UNIFIED_1D_U32_F32
2692   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2693                     Int32Regs:$b, Int32Regs:$a),
2694               (ins Int64Regs:$t, Float32Regs:$x),
2695               "tex.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2696               []>;
2697 def TEX_UNIFIED_1D_U32_F32_LEVEL
2698   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2699                     Int32Regs:$b, Int32Regs:$a),
2700               (ins Int64Regs:$t, Float32Regs:$x,
2701                    Float32Regs:$lod),
2702               "tex.level.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2703               "[$t, \\{$x\\}], $lod;",
2704               []>;
2705 def TEX_UNIFIED_1D_U32_F32_GRAD
2706   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2707                     Int32Regs:$b, Int32Regs:$a),
2708               (ins Int64Regs:$t, Float32Regs:$x,
2709                    Float32Regs:$gradx, Float32Regs:$grady),
2710               "tex.grad.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2711               "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2712               []>;
2713
2714 def TEX_UNIFIED_1D_ARRAY_F32_S32
2715   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2716                     Float32Regs:$b, Float32Regs:$a),
2717               (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
2718               "tex.a1d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
2719               "[$t, \\{$l, $x\\}];",
2720               []>;
2721 def TEX_UNIFIED_1D_ARRAY_F32_F32
2722   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2723                     Float32Regs:$b, Float32Regs:$a),
2724               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
2725               "tex.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2726               "[$t, \\{$l, $x\\}];",
2727               []>;
2728 def TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL
2729   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2730                     Float32Regs:$b, Float32Regs:$a),
2731               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2732                    Float32Regs:$lod),
2733               "tex.level.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2734               "[$t, \\{$l, $x\\}], $lod;",
2735               []>;
2736 def TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD
2737   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2738                     Float32Regs:$b, Float32Regs:$a),
2739               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2740                    Float32Regs:$gradx, Float32Regs:$grady),
2741               "tex.grad.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2742               "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2743               []>;
2744 def TEX_UNIFIED_1D_ARRAY_S32_S32
2745   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2746                     Int32Regs:$b, Int32Regs:$a),
2747               (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
2748               "tex.a1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
2749               "[$t, \\{$l, $x\\}];",
2750               []>;
2751 def TEX_UNIFIED_1D_ARRAY_S32_F32
2752   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2753                     Int32Regs:$b, Int32Regs:$a),
2754               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
2755               "tex.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2756               "[$t, \\{$l, $x\\}];",
2757               []>;
2758 def TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL
2759   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2760                     Int32Regs:$b, Int32Regs:$a),
2761               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2762                    Float32Regs:$lod),
2763               "tex.level.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2764               "[$t, \\{$l, $x\\}], $lod;",
2765               []>;
2766 def TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD
2767   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2768                     Int32Regs:$b, Int32Regs:$a),
2769               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2770                    Float32Regs:$gradx, Float32Regs:$grady),
2771               "tex.grad.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2772               "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2773               []>;
2774 def TEX_UNIFIED_1D_ARRAY_U32_S32
2775   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2776                     Int32Regs:$b, Int32Regs:$a),
2777               (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
2778               "tex.a1d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
2779               "[$t, \\{$l, $x\\}];",
2780               []>;
2781 def TEX_UNIFIED_1D_ARRAY_U32_F32
2782   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2783                     Int32Regs:$b, Int32Regs:$a),
2784               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
2785               "tex.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2786               "[$t, \\{$l, $x\\}];",
2787               []>;
2788 def TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL
2789   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2790                     Int32Regs:$b, Int32Regs:$a),
2791               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2792                    Float32Regs:$lod),
2793               "tex.level.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2794               "[$t, \\{$l, $x\\}], $lod;",
2795               []>;
2796 def TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD
2797   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2798                     Int32Regs:$b, Int32Regs:$a),
2799               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2800                    Float32Regs:$gradx, Float32Regs:$grady),
2801               "tex.grad.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2802               "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2803               []>;
2804
2805 def TEX_UNIFIED_2D_F32_S32
2806   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2807                     Float32Regs:$b, Float32Regs:$a),
2808               (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
2809               "tex.2d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
2810               "[$t, \\{$x, $y\\}];",
2811               []>;
2812 def TEX_UNIFIED_2D_F32_F32
2813   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2814                     Float32Regs:$b, Float32Regs:$a),
2815               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
2816               "tex.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2817               "[$t, \\{$x, $y\\}];",
2818               []>;
2819 def TEX_UNIFIED_2D_F32_F32_LEVEL
2820   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2821                     Float32Regs:$b, Float32Regs:$a),
2822               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
2823                    Float32Regs:$lod),
2824               "tex.level.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2825               "[$t, \\{$x, $y\\}], $lod;",
2826               []>;
2827 def TEX_UNIFIED_2D_F32_F32_GRAD
2828   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2829                     Float32Regs:$b, Float32Regs:$a),
2830               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
2831                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2832                    Float32Regs:$grady0, Float32Regs:$grady1),
2833               "tex.grad.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2834               "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2835               "\\{$grady0, $grady1\\};",
2836               []>;
2837 def TEX_UNIFIED_2D_S32_S32
2838   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2839                     Int32Regs:$b, Int32Regs:$a),
2840               (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
2841               "tex.2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
2842               "[$t, \\{$x, $y\\}];",
2843               []>;
2844 def TEX_UNIFIED_2D_S32_F32
2845   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2846                     Int32Regs:$b, Int32Regs:$a),
2847               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
2848               "tex.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2849               "[$t, \\{$x, $y\\}];",
2850               []>;
2851 def TEX_UNIFIED_2D_S32_F32_LEVEL
2852   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2853                     Int32Regs:$b, Int32Regs:$a),
2854               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
2855                    Float32Regs:$lod),
2856               "tex.level.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2857               "[$t, \\{$x, $y\\}], $lod;",
2858               []>;
2859 def TEX_UNIFIED_2D_S32_F32_GRAD
2860   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2861                     Int32Regs:$b, Int32Regs:$a),
2862               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
2863                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2864                    Float32Regs:$grady0, Float32Regs:$grady1),
2865               "tex.grad.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2866               "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2867               "\\{$grady0, $grady1\\};",
2868               []>;
2869 def TEX_UNIFIED_2D_U32_S32
2870   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2871                     Int32Regs:$b, Int32Regs:$a),
2872               (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
2873               "tex.2d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
2874               "[$t, \\{$x, $y\\}];",
2875               []>;
2876 def TEX_UNIFIED_2D_U32_F32
2877   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2878                     Int32Regs:$b, Int32Regs:$a),
2879               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
2880               "tex.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2881               "[$t, \\{$x, $y\\}];",
2882               []>;
2883 def TEX_UNIFIED_2D_U32_F32_LEVEL
2884   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2885                     Int32Regs:$b, Int32Regs:$a),
2886               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
2887                    Float32Regs:$lod),
2888               "tex.level.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2889               "[$t, \\{$x, $y\\}], $lod;",
2890               []>;
2891 def TEX_UNIFIED_2D_U32_F32_GRAD
2892   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2893                     Int32Regs:$b, Int32Regs:$a),
2894               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
2895                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2896                    Float32Regs:$grady0, Float32Regs:$grady1),
2897               "tex.grad.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2898               "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2899               "\\{$grady0, $grady1\\};",
2900               []>;
2901
2902 def TEX_UNIFIED_2D_ARRAY_F32_S32
2903   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2904                     Float32Regs:$b, Float32Regs:$a),
2905               (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
2906                    Int32Regs:$y),
2907               "tex.a2d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
2908               "[$t, \\{$l, $x, $y, $y\\}];",
2909               []>;
2910 def TEX_UNIFIED_2D_ARRAY_F32_F32
2911   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2912                     Float32Regs:$b, Float32Regs:$a),
2913               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2914                    Float32Regs:$y),
2915               "tex.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2916               "[$t, \\{$l, $x, $y, $y\\}];",
2917               []>;
2918 def TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL
2919   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2920                     Float32Regs:$b, Float32Regs:$a),
2921               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2922                    Float32Regs:$y, Float32Regs:$lod),
2923               "tex.level.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2924               "[$t, \\{$l, $x, $y, $y\\}], $lod;",
2925               []>;
2926 def TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD
2927   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2928                     Float32Regs:$b, Float32Regs:$a),
2929               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2930                    Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
2931                    Float32Regs:$grady0, Float32Regs:$grady1),
2932               "tex.grad.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2933               "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2934               "\\{$grady0, $grady1\\};",
2935               []>;
2936 def TEX_UNIFIED_2D_ARRAY_S32_S32
2937   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2938                     Int32Regs:$b, Int32Regs:$a),
2939               (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
2940                    Int32Regs:$y),
2941               "tex.a2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
2942               "[$t, \\{$l, $x, $y, $y\\}];",
2943               []>;
2944 def TEX_UNIFIED_2D_ARRAY_S32_F32
2945   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2946                     Int32Regs:$b, Int32Regs:$a),
2947               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2948                    Float32Regs:$y),
2949               "tex.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2950               "[$t, \\{$l, $x, $y, $y\\}];",
2951               []>;
2952 def TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL
2953   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2954                     Int32Regs:$b, Int32Regs:$a),
2955               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2956                    Float32Regs:$y, Float32Regs:$lod),
2957               "tex.level.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2958               "[$t, \\{$l, $x, $y, $y\\}], $lod;",
2959               []>;
2960 def TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD
2961   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2962                     Int32Regs:$b, Int32Regs:$a),
2963               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2964                    Float32Regs:$y,
2965                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2966                    Float32Regs:$grady0, Float32Regs:$grady1),
2967               "tex.grad.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2968               "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2969               "\\{$grady0, $grady1\\};",
2970               []>;
2971 def TEX_UNIFIED_2D_ARRAY_U32_S32
2972   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2973                     Int32Regs:$b, Int32Regs:$a),
2974               (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
2975                    Int32Regs:$y),
2976               "tex.a2d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
2977               "[$t, \\{$l, $x, $y, $y\\}];",
2978               []>;
2979 def TEX_UNIFIED_2D_ARRAY_U32_F32
2980   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2981                     Int32Regs:$b, Int32Regs:$a),
2982               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2983                    Float32Regs:$y),
2984               "tex.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2985               "[$t, \\{$l, $x, $y, $y\\}];",
2986               []>;
2987 def TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL
2988   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2989                     Int32Regs:$b, Int32Regs:$a),
2990               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2991                    Float32Regs:$y, Float32Regs:$lod),
2992               "tex.level.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2993               "[$t, \\{$l, $x, $y, $y\\}], $lod;",
2994               []>;
2995 def TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD
2996   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2997                     Int32Regs:$b, Int32Regs:$a),
2998               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2999                    Float32Regs:$y,
3000                    Float32Regs:$gradx0, Float32Regs:$gradx1,
3001                    Float32Regs:$grady0, Float32Regs:$grady1),
3002               "tex.grad.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3003               "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
3004               "\\{$grady0, $grady1\\};",
3005               []>;
3006
3007 def TEX_UNIFIED_3D_F32_S32
3008   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3009                     Float32Regs:$b, Float32Regs:$a),
3010               (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3011                    Int32Regs:$z),
3012               "tex.3d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
3013               "[$t, \\{$x, $y, $z, $z\\}];",
3014               []>;
3015 def TEX_UNIFIED_3D_F32_F32
3016   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3017                     Float32Regs:$b, Float32Regs:$a),
3018               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3019                    Float32Regs:$z),
3020               "tex.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
3021               "[$t, \\{$x, $y, $z, $z\\}];",
3022               []>;
3023 def TEX_UNIFIED_3D_F32_F32_LEVEL
3024   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3025                     Float32Regs:$b, Float32Regs:$a),
3026               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3027                    Float32Regs:$z, Float32Regs:$lod),
3028               "tex.level.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
3029               "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3030               []>;
3031 def TEX_UNIFIED_3D_F32_F32_GRAD
3032   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3033                     Float32Regs:$b, Float32Regs:$a),
3034               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3035                    Float32Regs:$z,
3036                    Float32Regs:$gradx0, Float32Regs:$gradx1,
3037                    Float32Regs:$gradx2, Float32Regs:$grady0,
3038                    Float32Regs:$grady1, Float32Regs:$grady2),
3039               "tex.grad.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
3040               "[$t, \\{$x, $y, $z, $z\\}], "
3041               "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3042               "\\{$grady0, $grady1, $grady2, $grady2\\};",
3043               []>;
3044 def TEX_UNIFIED_3D_S32_S32
3045   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3046                     Int32Regs:$b, Int32Regs:$a),
3047               (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3048                    Int32Regs:$z),
3049               "tex.3d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
3050               "[$t, \\{$x, $y, $z, $z\\}];",
3051               []>;
3052 def TEX_UNIFIED_3D_S32_F32
3053   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3054                     Int32Regs:$b, Int32Regs:$a),
3055               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3056                    Float32Regs:$z),
3057               "tex.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
3058               "[$t, \\{$x, $y, $z, $z\\}];",
3059               []>;
3060 def TEX_UNIFIED_3D_S32_F32_LEVEL
3061   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3062                     Int32Regs:$b, Int32Regs:$a),
3063               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3064                    Float32Regs:$z, Float32Regs:$lod),
3065               "tex.level.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
3066               "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3067               []>;
3068 def TEX_UNIFIED_3D_S32_F32_GRAD
3069   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3070                     Int32Regs:$b, Int32Regs:$a),
3071               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3072                    Float32Regs:$z,
3073                    Float32Regs:$gradx0, Float32Regs:$gradx1,
3074                    Float32Regs:$gradx2, Float32Regs:$grady0,
3075                    Float32Regs:$grady1, Float32Regs:$grady2),
3076               "tex.grad.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
3077               "[$t, \\{$x, $y, $z, $z\\}], "
3078               "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3079               "\\{$grady0, $grady1, $grady2, $grady2\\};",
3080               []>;
3081 def TEX_UNIFIED_3D_U32_S32
3082   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3083                     Int32Regs:$b, Int32Regs:$a),
3084               (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3085                    Int32Regs:$z),
3086               "tex.3d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
3087               "[$t, \\{$x, $y, $z, $z\\}];",
3088               []>;
3089 def TEX_UNIFIED_3D_U32_F32
3090   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3091                     Int32Regs:$b, Int32Regs:$a),
3092               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3093                    Float32Regs:$z),
3094               "tex.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3095               "[$t, \\{$x, $y, $z, $z\\}];",
3096               []>;
3097 def TEX_UNIFIED_3D_U32_F32_LEVEL
3098   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3099                     Int32Regs:$b, Int32Regs:$a),
3100               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3101                    Float32Regs:$z, Float32Regs:$lod),
3102               "tex.level.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3103               "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3104               []>;
3105 def TEX_UNIFIED_3D_U32_F32_GRAD
3106   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3107                     Int32Regs:$b, Int32Regs:$a),
3108               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3109                    Float32Regs:$z,
3110                    Float32Regs:$gradx0, Float32Regs:$gradx1,
3111                    Float32Regs:$gradx2, Float32Regs:$grady0,
3112                    Float32Regs:$grady1, Float32Regs:$grady2),
3113               "tex.grad.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3114               "[$t, \\{$x, $y, $z, $z\\}], "
3115               "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3116               "\\{$grady0, $grady1, $grady2, $grady2\\};",
3117               []>;
3118
3119 def TEX_UNIFIED_CUBE_F32_F32
3120   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3121                     Float32Regs:$b, Float32Regs:$a),
3122               (ins Int64Regs:$t,
3123                Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3124               "tex.cube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
3125               "[$t, \\{$x, $y, $z, $z\\}];",
3126               []>;
3127 def TEX_UNIFIED_CUBE_F32_F32_LEVEL
3128   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3129                     Float32Regs:$b, Float32Regs:$a),
3130               (ins Int64Regs:$t,
3131                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3132                    Float32Regs:$lod),
3133               "tex.level.cube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
3134               "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3135               []>;
3136 def TEX_UNIFIED_CUBE_S32_F32
3137   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3138                     Int32Regs:$b, Int32Regs:$a),
3139               (ins Int64Regs:$t,
3140                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3141               "tex.cube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
3142               "[$t, \\{$x, $y, $z, $z\\}];",
3143               []>;
3144 def TEX_UNIFIED_CUBE_S32_F32_LEVEL
3145   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3146                     Int32Regs:$b, Int32Regs:$a),
3147               (ins Int64Regs:$t,
3148                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3149                    Float32Regs:$lod),
3150               "tex.level.cube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
3151               "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3152               []>;
3153 def TEX_UNIFIED_CUBE_U32_F32
3154   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3155                     Int32Regs:$b, Int32Regs:$a),
3156               (ins Int64Regs:$t,
3157                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3158               "tex.cube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3159               "[$t, \\{$x, $y, $z, $z\\}];",
3160               []>;
3161 def TEX_UNIFIED_CUBE_U32_F32_LEVEL
3162   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3163                     Int32Regs:$b, Int32Regs:$a),
3164               (ins Int64Regs:$t,
3165                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3166                    Float32Regs:$lod),
3167               "tex.level.cube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3168               "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3169               []>;
3170
3171 def TEX_UNIFIED_CUBE_ARRAY_F32_F32
3172   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3173                     Float32Regs:$b, Float32Regs:$a),
3174               (ins Int64Regs:$t, Int32Regs:$l,
3175                Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3176               "tex.acube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
3177               "[$t, \\{$l, $x, $y, $z\\}];",
3178               []>;
3179 def TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL
3180   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3181                     Float32Regs:$b, Float32Regs:$a),
3182               (ins Int64Regs:$t, Int32Regs:$l,
3183                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3184                    Float32Regs:$lod),
3185               "tex.level.acube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
3186               "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3187               []>;
3188 def TEX_UNIFIED_CUBE_ARRAY_S32_F32
3189   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3190                     Int32Regs:$b, Int32Regs:$a),
3191               (ins Int64Regs:$t, Int32Regs:$l,
3192                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3193               "tex.acube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
3194               "[$t, \\{$l, $x, $y, $z\\}];",
3195               []>;
3196 def TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL
3197   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3198                     Int32Regs:$b, Int32Regs:$a),
3199               (ins Int64Regs:$t, Int32Regs:$l,
3200                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3201                    Float32Regs:$lod),
3202               "tex.level.acube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
3203               "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3204               []>;
3205 def TEX_UNIFIED_CUBE_ARRAY_U32_F32
3206   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3207                     Int32Regs:$b, Int32Regs:$a),
3208               (ins Int64Regs:$t, Int32Regs:$l,
3209                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3210               "tex.acube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3211               "[$t, \\{$l, $x, $y, $z\\}];",
3212               []>;
3213 def TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
3214   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3215                     Int32Regs:$b, Int32Regs:$a),
3216               (ins Int64Regs:$t, Int32Regs:$l,
3217                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3218                    Float32Regs:$lod),
3219               "tex.level.acube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3220               "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3221               []>;
3222
3223 def TLD4_UNIFIED_R_2D_F32_F32
3224   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3225                     Float32Regs:$v2, Float32Regs:$v3),
3226               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3227               "tld4.r.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3228               "[$t, \\{$x, $y\\}];",
3229               []>;
3230 def TLD4_UNIFIED_G_2D_F32_F32
3231   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3232                     Float32Regs:$v2, Float32Regs:$v3),
3233               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3234               "tld4.g.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3235               "[$t, \\{$x, $y\\}];",
3236               []>;
3237 def TLD4_UNIFIED_B_2D_F32_F32
3238   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3239                     Float32Regs:$v2, Float32Regs:$v3),
3240               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3241               "tld4.b.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3242               "[$t, \\{$x, $y\\}];",
3243               []>;
3244 def TLD4_UNIFIED_A_2D_F32_F32
3245   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3246                     Float32Regs:$v2, Float32Regs:$v3),
3247               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3248               "tld4.a.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3249               "[$t, \\{$x, $y\\}];",
3250               []>;
3251 def TLD4_UNIFIED_R_2D_S32_F32
3252   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3253                     Int32Regs:$v2, Int32Regs:$v3),
3254               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3255               "tld4.r.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3256               "[$t, \\{$x, $y\\}];",
3257               []>;
3258 def TLD4_UNIFIED_G_2D_S32_F32
3259   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3260                     Int32Regs:$v2, Int32Regs:$v3),
3261               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3262               "tld4.g.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3263               "[$t, \\{$x, $y\\}];",
3264               []>;
3265 def TLD4_UNIFIED_B_2D_S32_F32
3266   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3267                     Int32Regs:$v2, Int32Regs:$v3),
3268               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3269               "tld4.b.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3270               "[$t, \\{$x, $y\\}];",
3271               []>;
3272 def TLD4_UNIFIED_A_2D_S32_F32
3273   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3274                     Int32Regs:$v2, Int32Regs:$v3),
3275               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3276               "tld4.a.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3277               "[$t, \\{$x, $y\\}];",
3278               []>;
3279 def TLD4_UNIFIED_R_2D_U32_F32
3280   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3281                     Int32Regs:$v2, Int32Regs:$v3),
3282               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3283               "tld4.r.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3284               "[$t, \\{$x, $y\\}];",
3285               []>;
3286 def TLD4_UNIFIED_G_2D_U32_F32
3287   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3288                     Int32Regs:$v2, Int32Regs:$v3),
3289               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3290               "tld4.g.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3291               "[$t, \\{$x, $y\\}];",
3292               []>;
3293 def TLD4_UNIFIED_B_2D_U32_F32
3294   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3295                     Int32Regs:$v2, Int32Regs:$v3),
3296               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3297               "tld4.b.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3298               "[$t, \\{$x, $y\\}];",
3299               []>;
3300 def TLD4_UNIFIED_A_2D_U32_F32
3301   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3302                     Int32Regs:$v2, Int32Regs:$v3),
3303               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3304               "tld4.a.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3305               "[$t, \\{$x, $y\\}];",
3306               []>;
3307 }
3308
3309
3310
3311 //=== Surface load instructions
3312 // .clamp variant
3313 let IsSuld = 1 in {
3314 def SULD_1D_I8_CLAMP
3315   : NVPTXInst<(outs Int16Regs:$r),
3316               (ins Int64Regs:$s, Int32Regs:$x),
3317               "suld.b.1d.b8.clamp \\{$r\\}, [$s, \\{$x\\}];",
3318               []>;
3319 def SULD_1D_I16_CLAMP
3320   : NVPTXInst<(outs Int16Regs:$r),
3321               (ins Int64Regs:$s, Int32Regs:$x),
3322               "suld.b.1d.b16.clamp \\{$r\\}, [$s, \\{$x\\}];",
3323               []>;
3324 def SULD_1D_I32_CLAMP
3325   : NVPTXInst<(outs Int32Regs:$r),
3326               (ins Int64Regs:$s, Int32Regs:$x),
3327               "suld.b.1d.b32.clamp \\{$r\\}, [$s, \\{$x\\}];",
3328               []>;
3329 def SULD_1D_I64_CLAMP
3330   : NVPTXInst<(outs Int64Regs:$r),
3331               (ins Int64Regs:$s, Int32Regs:$x),
3332               "suld.b.1d.b64.clamp \\{$r\\}, [$s, \\{$x\\}];",
3333               []>;
3334
3335 def SULD_1D_ARRAY_I8_CLAMP
3336   : NVPTXInst<(outs Int16Regs:$r),
3337               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3338               "suld.b.a1d.b8.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3339               []>;
3340 def SULD_1D_ARRAY_I16_CLAMP
3341   : NVPTXInst<(outs Int16Regs:$r),
3342               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3343               "suld.b.a1d.b16.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3344               []>;
3345 def SULD_1D_ARRAY_I32_CLAMP
3346   : NVPTXInst<(outs Int32Regs:$r),
3347               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3348               "suld.b.a1d.b32.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3349               []>;
3350 def SULD_1D_ARRAY_I64_CLAMP
3351   : NVPTXInst<(outs Int64Regs:$r),
3352               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3353               "suld.b.a1d.b64.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3354               []>;
3355
3356 def SULD_2D_I8_CLAMP
3357   : NVPTXInst<(outs Int16Regs:$r),
3358               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3359               "suld.b.2d.b8.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3360               []>;
3361 def SULD_2D_I16_CLAMP
3362   : NVPTXInst<(outs Int16Regs:$r),
3363               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3364               "suld.b.2d.b16.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3365               []>;
3366 def SULD_2D_I32_CLAMP
3367   : NVPTXInst<(outs Int32Regs:$r),
3368               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3369               "suld.b.2d.b32.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3370               []>;
3371 def SULD_2D_I64_CLAMP
3372   : NVPTXInst<(outs Int64Regs:$r),
3373               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3374               "suld.b.2d.b64.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3375               []>;
3376
3377 def SULD_2D_ARRAY_I8_CLAMP
3378   : NVPTXInst<(outs Int16Regs:$r),
3379               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3380               "suld.b.a2d.b8.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3381               []>;
3382 def SULD_2D_ARRAY_I16_CLAMP
3383   : NVPTXInst<(outs Int16Regs:$r),
3384               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3385               "suld.b.a2d.b16.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3386               []>;
3387 def SULD_2D_ARRAY_I32_CLAMP
3388   : NVPTXInst<(outs Int32Regs:$r),
3389               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3390               "suld.b.a2d.b32.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3391               []>;
3392 def SULD_2D_ARRAY_I64_CLAMP
3393   : NVPTXInst<(outs Int64Regs:$r),
3394               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3395               "suld.b.a2d.b64.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3396               []>;
3397
3398 def SULD_3D_I8_CLAMP
3399   : NVPTXInst<(outs Int16Regs:$r),
3400               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3401               "suld.b.3d.b8.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3402               []>;
3403 def SULD_3D_I16_CLAMP
3404   : NVPTXInst<(outs Int16Regs:$r),
3405               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3406               "suld.b.3d.b16.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3407               []>;
3408 def SULD_3D_I32_CLAMP
3409   : NVPTXInst<(outs Int32Regs:$r),
3410               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3411               "suld.b.3d.b32.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3412               []>;
3413 def SULD_3D_I64_CLAMP
3414   : NVPTXInst<(outs Int64Regs:$r),
3415               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3416               "suld.b.3d.b64.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3417               []>;
3418 }
3419
3420 let IsSuld = 2 in {
3421 def SULD_1D_V2I8_CLAMP
3422   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3423               (ins Int64Regs:$s, Int32Regs:$x),
3424               "suld.b.1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3425               []>;
3426 def SULD_1D_V2I16_CLAMP
3427   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3428               (ins Int64Regs:$s, Int32Regs:$x),
3429               "suld.b.1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3430               []>;
3431 def SULD_1D_V2I32_CLAMP
3432   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3433               (ins Int64Regs:$s, Int32Regs:$x),
3434               "suld.b.1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3435               []>;
3436 def SULD_1D_V2I64_CLAMP
3437   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3438               (ins Int64Regs:$s, Int32Regs:$x),
3439               "suld.b.1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3440               []>;
3441
3442 def SULD_1D_ARRAY_V2I8_CLAMP
3443   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3444               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3445               "suld.b.a1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3446               []>;
3447 def SULD_1D_ARRAY_V2I16_CLAMP
3448   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3449               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3450               "suld.b.a1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3451               []>;
3452 def SULD_1D_ARRAY_V2I32_CLAMP
3453   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3454               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3455               "suld.b.a1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3456               []>;
3457 def SULD_1D_ARRAY_V2I64_CLAMP
3458   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3459               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3460               "suld.b.a1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3461               []>;
3462
3463 def SULD_2D_V2I8_CLAMP
3464   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3465               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3466               "suld.b.2d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3467               []>;
3468 def SULD_2D_V2I16_CLAMP
3469   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3470               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3471               "suld.b.2d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3472               []>;
3473 def SULD_2D_V2I32_CLAMP
3474   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3475               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3476               "suld.b.2d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3477               []>;
3478 def SULD_2D_V2I64_CLAMP
3479   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3480               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3481               "suld.b.2d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3482               []>;
3483
3484 def SULD_2D_ARRAY_V2I8_CLAMP
3485   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3486               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3487               "suld.b.a2d.v2.b8.clamp \\{$r, $g\\}, "
3488               "[$s, \\{$l, $x, $y, $y\\}];",
3489               []>;
3490 def SULD_2D_ARRAY_V2I16_CLAMP
3491   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3492               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3493               "suld.b.a2d.v2.b16.clamp \\{$r, $g\\}, "
3494               "[$s, \\{$l, $x, $y, $y\\}];",
3495               []>;
3496 def SULD_2D_ARRAY_V2I32_CLAMP
3497   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3498               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3499               "suld.b.a2d.v2.b32.clamp \\{$r, $g\\}, "
3500               "[$s, \\{$l, $x, $y, $y\\}];",
3501               []>;
3502 def SULD_2D_ARRAY_V2I64_CLAMP
3503   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3504               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3505               "suld.b.a2d.v2.b64.clamp \\{$r, $g\\}, "
3506               "[$s, \\{$l, $x, $y, $y\\}];",
3507               []>;
3508
3509 def SULD_3D_V2I8_CLAMP
3510   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3511               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3512               "suld.b.3d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3513               []>;
3514 def SULD_3D_V2I16_CLAMP
3515   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3516               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3517               "suld.b.3d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3518               []>;
3519 def SULD_3D_V2I32_CLAMP
3520   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3521               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3522               "suld.b.3d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3523               []>;
3524 def SULD_3D_V2I64_CLAMP
3525   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3526               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3527               "suld.b.3d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3528               []>;
3529 }
3530
3531 let IsSuld = 3 in {
3532 def SULD_1D_V4I8_CLAMP
3533   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3534               (ins Int64Regs:$s, Int32Regs:$x),
3535               "suld.b.1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3536               []>;
3537 def SULD_1D_V4I16_CLAMP
3538   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3539               (ins Int64Regs:$s, Int32Regs:$x),
3540               "suld.b.1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3541               []>;
3542 def SULD_1D_V4I32_CLAMP
3543   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3544               (ins Int64Regs:$s, Int32Regs:$x),
3545               "suld.b.1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3546               []>;
3547
3548 def SULD_1D_ARRAY_V4I8_CLAMP
3549   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3550               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3551               "suld.b.a1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
3552               "[$s, \\{$l, $x\\}];",
3553               []>;
3554 def SULD_1D_ARRAY_V4I16_CLAMP
3555   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3556               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3557               "suld.b.a1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
3558               "[$s, \\{$l, $x\\}];",
3559               []>;
3560 def SULD_1D_ARRAY_V4I32_CLAMP
3561   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3562               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3563               "suld.b.a1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
3564               "[$s, \\{$l, $x\\}];",
3565               []>;
3566
3567 def SULD_2D_V4I8_CLAMP
3568   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3569               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3570               "suld.b.2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3571               []>;
3572 def SULD_2D_V4I16_CLAMP
3573   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3574               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3575               "suld.b.2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3576               []>;
3577 def SULD_2D_V4I32_CLAMP
3578   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3579               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3580               "suld.b.2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3581               []>;
3582
3583 def SULD_2D_ARRAY_V4I8_CLAMP
3584   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3585               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3586               "suld.b.a2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
3587               "[$s, \\{$l, $x, $y, $y\\}];",
3588               []>;
3589 def SULD_2D_ARRAY_V4I16_CLAMP
3590   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3591               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3592               "suld.b.a2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
3593               "[$s, \\{$l, $x, $y, $y\\}];",
3594               []>;
3595 def SULD_2D_ARRAY_V4I32_CLAMP
3596   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3597               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3598               "suld.b.a2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
3599               "[$s, \\{$l, $x, $y, $y\\}];",
3600               []>;
3601
3602
3603 def SULD_3D_V4I8_CLAMP
3604   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3605               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3606               "suld.b.3d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
3607               "[$s, \\{$x, $y, $z, $z\\}];",
3608               []>;
3609 def SULD_3D_V4I16_CLAMP
3610   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3611               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3612               "suld.b.3d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
3613               "[$s, \\{$x, $y, $z, $z\\}];",
3614               []>;
3615 def SULD_3D_V4I32_CLAMP
3616   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3617               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3618               "suld.b.3d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
3619               "[$s, \\{$x, $y, $z, $z\\}];",
3620               []>;
3621 }
3622
3623
3624 // .trap variant
3625 let IsSuld = 1 in {
3626 def SULD_1D_I8_TRAP
3627   : NVPTXInst<(outs Int16Regs:$r),
3628               (ins Int64Regs:$s, Int32Regs:$x),
3629               "suld.b.1d.b8.trap \\{$r\\}, [$s, \\{$x\\}];",
3630               []>;
3631 def SULD_1D_I16_TRAP
3632   : NVPTXInst<(outs Int16Regs:$r),
3633               (ins Int64Regs:$s, Int32Regs:$x),
3634               "suld.b.1d.b16.trap \\{$r\\}, [$s, \\{$x\\}];",
3635               []>;
3636 def SULD_1D_I32_TRAP
3637   : NVPTXInst<(outs Int32Regs:$r),
3638               (ins Int64Regs:$s, Int32Regs:$x),
3639               "suld.b.1d.b32.trap \\{$r\\}, [$s, \\{$x\\}];",
3640               []>;
3641 def SULD_1D_I64_TRAP
3642   : NVPTXInst<(outs Int64Regs:$r),
3643               (ins Int64Regs:$s, Int32Regs:$x),
3644               "suld.b.1d.b64.trap \\{$r\\}, [$s, \\{$x\\}];",
3645               []>;
3646
3647 def SULD_1D_ARRAY_I8_TRAP
3648   : NVPTXInst<(outs Int16Regs:$r),
3649               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3650               "suld.b.a1d.b8.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
3651               []>;
3652 def SULD_1D_ARRAY_I16_TRAP
3653   : NVPTXInst<(outs Int16Regs:$r),
3654               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3655               "suld.b.a1d.b16.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
3656               []>;
3657 def SULD_1D_ARRAY_I32_TRAP
3658   : NVPTXInst<(outs Int32Regs:$r),
3659               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3660               "suld.b.a1d.b32.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
3661               []>;
3662 def SULD_1D_ARRAY_I64_TRAP
3663   : NVPTXInst<(outs Int64Regs:$r),
3664               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3665               "suld.b.a1d.b64.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
3666               []>;
3667
3668 def SULD_2D_I8_TRAP
3669   : NVPTXInst<(outs Int16Regs:$r),
3670               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3671               "suld.b.2d.b8.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
3672               []>;
3673 def SULD_2D_I16_TRAP
3674   : NVPTXInst<(outs Int16Regs:$r),
3675               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3676               "suld.b.2d.b16.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
3677               []>;
3678 def SULD_2D_I32_TRAP
3679   : NVPTXInst<(outs Int32Regs:$r),
3680               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3681               "suld.b.2d.b32.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
3682               []>;
3683 def SULD_2D_I64_TRAP
3684   : NVPTXInst<(outs Int64Regs:$r),
3685               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3686               "suld.b.2d.b64.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
3687               []>;
3688
3689 def SULD_2D_ARRAY_I8_TRAP
3690   : NVPTXInst<(outs Int16Regs:$r),
3691               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3692               "suld.b.a2d.b8.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3693               []>;
3694 def SULD_2D_ARRAY_I16_TRAP
3695   : NVPTXInst<(outs Int16Regs:$r),
3696               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3697               "suld.b.a2d.b16.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3698               []>;
3699 def SULD_2D_ARRAY_I32_TRAP
3700   : NVPTXInst<(outs Int32Regs:$r),
3701               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3702               "suld.b.a2d.b32.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3703               []>;
3704 def SULD_2D_ARRAY_I64_TRAP
3705   : NVPTXInst<(outs Int64Regs:$r),
3706               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3707               "suld.b.a2d.b64.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3708               []>;
3709
3710 def SULD_3D_I8_TRAP
3711   : NVPTXInst<(outs Int16Regs:$r),
3712               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3713               "suld.b.3d.b8.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3714               []>;
3715 def SULD_3D_I16_TRAP
3716   : NVPTXInst<(outs Int16Regs:$r),
3717               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3718               "suld.b.3d.b16.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3719               []>;
3720 def SULD_3D_I32_TRAP
3721   : NVPTXInst<(outs Int32Regs:$r),
3722               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3723               "suld.b.3d.b32.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3724               []>;
3725 def SULD_3D_I64_TRAP
3726   : NVPTXInst<(outs Int64Regs:$r),
3727               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3728               "suld.b.3d.b64.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3729               []>;
3730 }
3731
3732 let IsSuld = 2 in {
3733 def SULD_1D_V2I8_TRAP
3734   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3735               (ins Int64Regs:$s, Int32Regs:$x),
3736               "suld.b.1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
3737               []>;
3738 def SULD_1D_V2I16_TRAP
3739   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3740               (ins Int64Regs:$s, Int32Regs:$x),
3741               "suld.b.1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
3742               []>;
3743 def SULD_1D_V2I32_TRAP
3744   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3745               (ins Int64Regs:$s, Int32Regs:$x),
3746               "suld.b.1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
3747               []>;
3748 def SULD_1D_V2I64_TRAP
3749   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3750               (ins Int64Regs:$s, Int32Regs:$x),
3751               "suld.b.1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
3752               []>;
3753
3754 def SULD_1D_ARRAY_V2I8_TRAP
3755   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3756               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3757               "suld.b.a1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3758               []>;
3759 def SULD_1D_ARRAY_V2I16_TRAP
3760   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3761               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3762               "suld.b.a1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3763               []>;
3764 def SULD_1D_ARRAY_V2I32_TRAP
3765   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3766               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3767               "suld.b.a1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3768               []>;
3769 def SULD_1D_ARRAY_V2I64_TRAP
3770   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3771               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3772               "suld.b.a1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3773               []>;
3774
3775 def SULD_2D_V2I8_TRAP
3776   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3777               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3778               "suld.b.2d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3779               []>;
3780 def SULD_2D_V2I16_TRAP
3781   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3782               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3783               "suld.b.2d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3784               []>;
3785 def SULD_2D_V2I32_TRAP
3786   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3787               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3788               "suld.b.2d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3789               []>;
3790 def SULD_2D_V2I64_TRAP
3791   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3792               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3793               "suld.b.2d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3794               []>;
3795
3796 def SULD_2D_ARRAY_V2I8_TRAP
3797   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3798               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3799               "suld.b.a2d.v2.b8.trap \\{$r, $g\\}, "
3800               "[$s, \\{$l, $x, $y, $y\\}];",
3801               []>;
3802 def SULD_2D_ARRAY_V2I16_TRAP
3803   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3804               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3805               "suld.b.a2d.v2.b16.trap \\{$r, $g\\}, "
3806               "[$s, \\{$l, $x, $y, $y\\}];",
3807               []>;
3808 def SULD_2D_ARRAY_V2I32_TRAP
3809   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3810               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3811               "suld.b.a2d.v2.b32.trap \\{$r, $g\\}, "
3812               "[$s, \\{$l, $x, $y, $y\\}];",
3813               []>;
3814 def SULD_2D_ARRAY_V2I64_TRAP
3815   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3816               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3817               "suld.b.a2d.v2.b64.trap \\{$r, $g\\}, "
3818               "[$s, \\{$l, $x, $y, $y\\}];",
3819               []>;
3820
3821 def SULD_3D_V2I8_TRAP
3822   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3823               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3824               "suld.b.3d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3825               []>;
3826 def SULD_3D_V2I16_TRAP
3827   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3828               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3829               "suld.b.3d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3830               []>;
3831 def SULD_3D_V2I32_TRAP
3832   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3833               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3834               "suld.b.3d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3835               []>;
3836 def SULD_3D_V2I64_TRAP
3837   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3838               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3839               "suld.b.3d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3840               []>;
3841 }
3842
3843 let IsSuld = 3 in {
3844 def SULD_1D_V4I8_TRAP
3845   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3846               (ins Int64Regs:$s, Int32Regs:$x),
3847               "suld.b.1d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3848               []>;
3849 def SULD_1D_V4I16_TRAP
3850   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3851               (ins Int64Regs:$s, Int32Regs:$x),
3852               "suld.b.1d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3853               []>;
3854 def SULD_1D_V4I32_TRAP
3855   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3856               (ins Int64Regs:$s, Int32Regs:$x),
3857               "suld.b.1d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3858               []>;
3859
3860 def SULD_1D_ARRAY_V4I8_TRAP
3861   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3862               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3863               "suld.b.a1d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
3864               "[$s, \\{$l, $x\\}];",
3865               []>;
3866 def SULD_1D_ARRAY_V4I16_TRAP
3867   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3868               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3869               "suld.b.a1d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
3870               "[$s, \\{$l, $x\\}];",
3871               []>;
3872 def SULD_1D_ARRAY_V4I32_TRAP
3873   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3874               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3875               "suld.b.a1d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
3876               "[$s, \\{$l, $x\\}];",
3877               []>;
3878
3879 def SULD_2D_V4I8_TRAP
3880   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3881               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3882               "suld.b.2d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3883               []>;
3884 def SULD_2D_V4I16_TRAP
3885   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3886               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3887               "suld.b.2d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3888               []>;
3889 def SULD_2D_V4I32_TRAP
3890   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3891               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3892               "suld.b.2d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3893               []>;
3894
3895 def SULD_2D_ARRAY_V4I8_TRAP
3896   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3897               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3898               "suld.b.a2d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
3899               "[$s, \\{$l, $x, $y, $y\\}];",
3900               []>;
3901 def SULD_2D_ARRAY_V4I16_TRAP
3902   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3903               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3904               "suld.b.a2d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
3905               "[$s, \\{$l, $x, $y, $y\\}];",
3906               []>;
3907 def SULD_2D_ARRAY_V4I32_TRAP
3908   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3909               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3910               "suld.b.a2d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
3911               "[$s, \\{$l, $x, $y, $y\\}];",
3912               []>;
3913
3914
3915 def SULD_3D_V4I8_TRAP
3916   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3917               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3918               "suld.b.3d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
3919               "[$s, \\{$x, $y, $z, $z\\}];",
3920               []>;
3921 def SULD_3D_V4I16_TRAP
3922   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3923               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3924               "suld.b.3d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
3925               "[$s, \\{$x, $y, $z, $z\\}];",
3926               []>;
3927 def SULD_3D_V4I32_TRAP
3928   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3929               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3930               "suld.b.3d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
3931               "[$s, \\{$x, $y, $z, $z\\}];",
3932               []>;
3933 }
3934
3935 // .zero variant
3936 let IsSuld = 1 in {
3937 def SULD_1D_I8_ZERO
3938   : NVPTXInst<(outs Int16Regs:$r),
3939               (ins Int64Regs:$s, Int32Regs:$x),
3940               "suld.b.1d.b8.zero \\{$r\\}, [$s, \\{$x\\}];",
3941               []>;
3942 def SULD_1D_I16_ZERO
3943   : NVPTXInst<(outs Int16Regs:$r),
3944               (ins Int64Regs:$s, Int32Regs:$x),
3945               "suld.b.1d.b16.zero \\{$r\\}, [$s, \\{$x\\}];",
3946               []>;
3947 def SULD_1D_I32_ZERO
3948   : NVPTXInst<(outs Int32Regs:$r),
3949               (ins Int64Regs:$s, Int32Regs:$x),
3950               "suld.b.1d.b32.zero \\{$r\\}, [$s, \\{$x\\}];",
3951               []>;
3952 def SULD_1D_I64_ZERO
3953   : NVPTXInst<(outs Int64Regs:$r),
3954               (ins Int64Regs:$s, Int32Regs:$x),
3955               "suld.b.1d.b64.zero \\{$r\\}, [$s, \\{$x\\}];",
3956               []>;
3957
3958 def SULD_1D_ARRAY_I8_ZERO
3959   : NVPTXInst<(outs Int16Regs:$r),
3960               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3961               "suld.b.a1d.b8.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
3962               []>;
3963 def SULD_1D_ARRAY_I16_ZERO
3964   : NVPTXInst<(outs Int16Regs:$r),
3965               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3966               "suld.b.a1d.b16.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
3967               []>;
3968 def SULD_1D_ARRAY_I32_ZERO
3969   : NVPTXInst<(outs Int32Regs:$r),
3970               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3971               "suld.b.a1d.b32.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
3972               []>;
3973 def SULD_1D_ARRAY_I64_ZERO
3974   : NVPTXInst<(outs Int64Regs:$r),
3975               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3976               "suld.b.a1d.b64.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
3977               []>;
3978
3979 def SULD_2D_I8_ZERO
3980   : NVPTXInst<(outs Int16Regs:$r),
3981               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3982               "suld.b.2d.b8.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
3983               []>;
3984 def SULD_2D_I16_ZERO
3985   : NVPTXInst<(outs Int16Regs:$r),
3986               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3987               "suld.b.2d.b16.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
3988               []>;
3989 def SULD_2D_I32_ZERO
3990   : NVPTXInst<(outs Int32Regs:$r),
3991               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3992               "suld.b.2d.b32.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
3993               []>;
3994 def SULD_2D_I64_ZERO
3995   : NVPTXInst<(outs Int64Regs:$r),
3996               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3997               "suld.b.2d.b64.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
3998               []>;
3999
4000 def SULD_2D_ARRAY_I8_ZERO
4001   : NVPTXInst<(outs Int16Regs:$r),
4002               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4003               "suld.b.a2d.b8.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4004               []>;
4005 def SULD_2D_ARRAY_I16_ZERO
4006   : NVPTXInst<(outs Int16Regs:$r),
4007               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4008               "suld.b.a2d.b16.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4009               []>;
4010 def SULD_2D_ARRAY_I32_ZERO
4011   : NVPTXInst<(outs Int32Regs:$r),
4012               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4013               "suld.b.a2d.b32.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4014               []>;
4015 def SULD_2D_ARRAY_I64_ZERO
4016   : NVPTXInst<(outs Int64Regs:$r),
4017               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4018               "suld.b.a2d.b64.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4019               []>;
4020
4021 def SULD_3D_I8_ZERO
4022   : NVPTXInst<(outs Int16Regs:$r),
4023               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4024               "suld.b.3d.b8.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4025               []>;
4026 def SULD_3D_I16_ZERO
4027   : NVPTXInst<(outs Int16Regs:$r),
4028               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4029               "suld.b.3d.b16.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4030               []>;
4031 def SULD_3D_I32_ZERO
4032   : NVPTXInst<(outs Int32Regs:$r),
4033               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4034               "suld.b.3d.b32.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4035               []>;
4036 def SULD_3D_I64_ZERO
4037   : NVPTXInst<(outs Int64Regs:$r),
4038               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4039               "suld.b.3d.b64.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4040               []>;
4041 }
4042
4043 let IsSuld = 2 in {
4044 def SULD_1D_V2I8_ZERO
4045   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4046               (ins Int64Regs:$s, Int32Regs:$x),
4047               "suld.b.1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4048               []>;
4049 def SULD_1D_V2I16_ZERO
4050   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4051               (ins Int64Regs:$s, Int32Regs:$x),
4052               "suld.b.1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4053               []>;
4054 def SULD_1D_V2I32_ZERO
4055   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4056               (ins Int64Regs:$s, Int32Regs:$x),
4057               "suld.b.1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4058               []>;
4059 def SULD_1D_V2I64_ZERO
4060   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4061               (ins Int64Regs:$s, Int32Regs:$x),
4062               "suld.b.1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4063               []>;
4064
4065 def SULD_1D_ARRAY_V2I8_ZERO
4066   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4067               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4068               "suld.b.a1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4069               []>;
4070 def SULD_1D_ARRAY_V2I16_ZERO
4071   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4072               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4073               "suld.b.a1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4074               []>;
4075 def SULD_1D_ARRAY_V2I32_ZERO
4076   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4077               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4078               "suld.b.a1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4079               []>;
4080 def SULD_1D_ARRAY_V2I64_ZERO
4081   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4082               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4083               "suld.b.a1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4084               []>;
4085
4086 def SULD_2D_V2I8_ZERO
4087   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4088               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4089               "suld.b.2d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4090               []>;
4091 def SULD_2D_V2I16_ZERO
4092   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4093               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4094               "suld.b.2d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4095               []>;
4096 def SULD_2D_V2I32_ZERO
4097   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4098               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4099               "suld.b.2d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4100               []>;
4101 def SULD_2D_V2I64_ZERO
4102   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4103               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4104               "suld.b.2d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4105               []>;
4106
4107 def SULD_2D_ARRAY_V2I8_ZERO
4108   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4109               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4110               "suld.b.a2d.v2.b8.zero \\{$r, $g\\}, "
4111               "[$s, \\{$l, $x, $y, $y\\}];",
4112               []>;
4113 def SULD_2D_ARRAY_V2I16_ZERO
4114   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4115               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4116               "suld.b.a2d.v2.b16.zero \\{$r, $g\\}, "
4117               "[$s, \\{$l, $x, $y, $y\\}];",
4118               []>;
4119 def SULD_2D_ARRAY_V2I32_ZERO
4120   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4121               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4122               "suld.b.a2d.v2.b32.zero \\{$r, $g\\}, "
4123               "[$s, \\{$l, $x, $y, $y\\}];",
4124               []>;
4125 def SULD_2D_ARRAY_V2I64_ZERO
4126   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4127               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4128               "suld.b.a2d.v2.b64.zero \\{$r, $g\\}, "
4129               "[$s, \\{$l, $x, $y, $y\\}];",
4130               []>;
4131
4132 def SULD_3D_V2I8_ZERO
4133   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4134               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4135               "suld.b.3d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4136               []>;
4137 def SULD_3D_V2I16_ZERO
4138   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4139               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4140               "suld.b.3d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4141               []>;
4142 def SULD_3D_V2I32_ZERO
4143   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4144               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4145               "suld.b.3d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4146               []>;
4147 def SULD_3D_V2I64_ZERO
4148   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4149               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4150               "suld.b.3d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4151               []>;
4152 }
4153
4154 let IsSuld = 3 in {
4155 def SULD_1D_V4I8_ZERO
4156   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4157               (ins Int64Regs:$s, Int32Regs:$x),
4158               "suld.b.1d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4159               []>;
4160 def SULD_1D_V4I16_ZERO
4161   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4162               (ins Int64Regs:$s, Int32Regs:$x),
4163               "suld.b.1d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4164               []>;
4165 def SULD_1D_V4I32_ZERO
4166   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4167               (ins Int64Regs:$s, Int32Regs:$x),
4168               "suld.b.1d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4169               []>;
4170
4171 def SULD_1D_ARRAY_V4I8_ZERO
4172   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4173               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4174               "suld.b.a1d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4175               "[$s, \\{$l, $x\\}];",
4176               []>;
4177 def SULD_1D_ARRAY_V4I16_ZERO
4178   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4179               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4180               "suld.b.a1d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4181               "[$s, \\{$l, $x\\}];",
4182               []>;
4183 def SULD_1D_ARRAY_V4I32_ZERO
4184   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4185               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4186               "suld.b.a1d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4187               "[$s, \\{$l, $x\\}];",
4188               []>;
4189
4190 def SULD_2D_V4I8_ZERO
4191   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4192               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4193               "suld.b.2d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4194               []>;
4195 def SULD_2D_V4I16_ZERO
4196   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4197               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4198               "suld.b.2d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4199               []>;
4200 def SULD_2D_V4I32_ZERO
4201   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4202               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4203               "suld.b.2d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4204               []>;
4205
4206 def SULD_2D_ARRAY_V4I8_ZERO
4207   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4208               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4209               "suld.b.a2d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4210               "[$s, \\{$l, $x, $y, $y\\}];",
4211               []>;
4212 def SULD_2D_ARRAY_V4I16_ZERO
4213   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4214               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4215               "suld.b.a2d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4216               "[$s, \\{$l, $x, $y, $y\\}];",
4217               []>;
4218 def SULD_2D_ARRAY_V4I32_ZERO
4219   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4220               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4221               "suld.b.a2d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4222               "[$s, \\{$l, $x, $y, $y\\}];",
4223               []>;
4224
4225
4226 def SULD_3D_V4I8_ZERO
4227   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4228               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4229               "suld.b.3d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4230               "[$s, \\{$x, $y, $z, $z\\}];",
4231               []>;
4232 def SULD_3D_V4I16_ZERO
4233   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4234               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4235               "suld.b.3d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4236               "[$s, \\{$x, $y, $z, $z\\}];",
4237               []>;
4238 def SULD_3D_V4I32_ZERO
4239   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4240               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4241               "suld.b.3d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4242               "[$s, \\{$x, $y, $z, $z\\}];",
4243               []>;
4244 }
4245
4246 //-----------------------------------
4247 // Texture Query Intrinsics
4248 //-----------------------------------
4249
4250 let IsSurfTexQuery = 1 in {
4251 def TXQ_CHANNEL_ORDER
4252   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4253               "txq.channel_order.b32 \t$d, [$a];",
4254               []>;
4255 def TXQ_CHANNEL_DATA_TYPE
4256   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4257               "txq.channel_data_type.b32 \t$d, [$a];",
4258               []>;
4259 def TXQ_WIDTH
4260   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4261               "txq.width.b32 \t$d, [$a];",
4262               []>;
4263 def TXQ_HEIGHT
4264   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4265               "txq.height.b32 \t$d, [$a];",
4266               []>;
4267 def TXQ_DEPTH
4268   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4269               "txq.depth.b32 \t$d, [$a];",
4270               []>;
4271 def TXQ_ARRAY_SIZE
4272   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4273               "txq.array_size.b32 \t$d, [$a];",
4274               []>;
4275 def TXQ_NUM_SAMPLES
4276   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4277               "txq.num_samples.b32 \t$d, [$a];",
4278               []>;
4279 def TXQ_NUM_MIPMAP_LEVELS
4280   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4281               "txq.num_mipmap_levels.b32 \t$d, [$a];",
4282               []>;
4283 }
4284
4285 def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a),
4286           (TXQ_CHANNEL_ORDER Int64Regs:$a)>;
4287 def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a),
4288           (TXQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
4289 def : Pat<(int_nvvm_txq_width Int64Regs:$a),
4290           (TXQ_WIDTH Int64Regs:$a)>;
4291 def : Pat<(int_nvvm_txq_height Int64Regs:$a),
4292           (TXQ_HEIGHT Int64Regs:$a)>;
4293 def : Pat<(int_nvvm_txq_depth Int64Regs:$a),
4294           (TXQ_DEPTH Int64Regs:$a)>;
4295 def : Pat<(int_nvvm_txq_array_size Int64Regs:$a),
4296           (TXQ_ARRAY_SIZE Int64Regs:$a)>;
4297 def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a),
4298           (TXQ_NUM_SAMPLES Int64Regs:$a)>;
4299 def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a),
4300           (TXQ_NUM_MIPMAP_LEVELS Int64Regs:$a)>;
4301
4302
4303 //-----------------------------------
4304 // Surface Query Intrinsics
4305 //-----------------------------------
4306
4307 let IsSurfTexQuery = 1 in {
4308 def SUQ_CHANNEL_ORDER
4309   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4310               "suq.channel_order.b32 \t$d, [$a];",
4311               []>;
4312 def SUQ_CHANNEL_DATA_TYPE
4313   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4314               "suq.channel_data_type.b32 \t$d, [$a];",
4315               []>;
4316 def SUQ_WIDTH
4317   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4318               "suq.width.b32 \t$d, [$a];",
4319               []>;
4320 def SUQ_HEIGHT
4321   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4322               "suq.height.b32 \t$d, [$a];",
4323               []>;
4324 def SUQ_DEPTH
4325   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4326               "suq.depth.b32 \t$d, [$a];",
4327               []>;
4328 def SUQ_ARRAY_SIZE
4329   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4330               "suq.array_size.b32 \t$d, [$a];",
4331               []>;
4332 }
4333
4334 def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a),
4335           (SUQ_CHANNEL_ORDER Int64Regs:$a)>;
4336 def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a),
4337           (SUQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
4338 def : Pat<(int_nvvm_suq_width Int64Regs:$a),
4339           (SUQ_WIDTH Int64Regs:$a)>;
4340 def : Pat<(int_nvvm_suq_height Int64Regs:$a),
4341           (SUQ_HEIGHT Int64Regs:$a)>;
4342 def : Pat<(int_nvvm_suq_depth Int64Regs:$a),
4343           (SUQ_DEPTH Int64Regs:$a)>;
4344 def : Pat<(int_nvvm_suq_array_size Int64Regs:$a),
4345           (SUQ_ARRAY_SIZE Int64Regs:$a)>;
4346
4347
4348 //===- Handle Query -------------------------------------------------------===//
4349
4350 // TODO: These intrinsics are not yet finalized, pending PTX ISA design work
4351 def ISTYPEP_SAMPLER
4352   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4353               "istypep.samplerref \t$d, $a;",
4354               [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>;
4355 def ISTYPEP_SURFACE
4356   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4357               "istypep.surfref \t$d, $a;",
4358               [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>;
4359 def ISTYPEP_TEXTURE
4360   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4361               "istypep.texref \t$d, $a;",
4362               [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>;
4363
4364 //===- Surface Stores -----------------------------------------------------===//
4365
4366 let IsSust = 1 in {
4367 // Unformatted
4368 // .clamp variant
4369 def SUST_B_1D_B8_CLAMP
4370   : NVPTXInst<(outs),
4371               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4372               "sust.b.1d.b8.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4373               []>;
4374 def SUST_B_1D_B16_CLAMP
4375   : NVPTXInst<(outs),
4376               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4377               "sust.b.1d.b16.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4378               []>;
4379 def SUST_B_1D_B32_CLAMP
4380   : NVPTXInst<(outs),
4381               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
4382               "sust.b.1d.b32.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4383               []>;
4384 def SUST_B_1D_B64_CLAMP
4385   : NVPTXInst<(outs),
4386               (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
4387               "sust.b.1d.b64.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4388               []>;
4389 def SUST_B_1D_V2B8_CLAMP
4390   : NVPTXInst<(outs),
4391               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4392               "sust.b.1d.v2.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4393               []>;
4394 def SUST_B_1D_V2B16_CLAMP
4395   : NVPTXInst<(outs),
4396               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4397               "sust.b.1d.v2.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4398               []>;
4399 def SUST_B_1D_V2B32_CLAMP
4400   : NVPTXInst<(outs),
4401               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
4402               "sust.b.1d.v2.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4403               []>;
4404 def SUST_B_1D_V2B64_CLAMP
4405   : NVPTXInst<(outs),
4406               (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
4407               "sust.b.1d.v2.b64.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4408               []>;
4409 def SUST_B_1D_V4B8_CLAMP
4410   : NVPTXInst<(outs),
4411               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
4412                    Int16Regs:$b, Int16Regs:$a),
4413               "sust.b.1d.v4.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4414               []>;
4415 def SUST_B_1D_V4B16_CLAMP
4416   : NVPTXInst<(outs),
4417               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
4418                    Int16Regs:$b, Int16Regs:$a),
4419               "sust.b.1d.v4.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4420               []>;
4421 def SUST_B_1D_V4B32_CLAMP
4422   : NVPTXInst<(outs),
4423               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
4424                    Int32Regs:$b, Int32Regs:$a),
4425               "sust.b.1d.v4.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4426               []>;
4427
4428
4429 def SUST_B_1D_ARRAY_B8_CLAMP
4430   : NVPTXInst<(outs),
4431               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
4432               "sust.b.a1d.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4433               []>;
4434 def SUST_B_1D_ARRAY_B16_CLAMP
4435   : NVPTXInst<(outs),
4436               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
4437               "sust.b.a1d.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4438               []>;
4439 def SUST_B_1D_ARRAY_B32_CLAMP
4440   : NVPTXInst<(outs),
4441               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
4442               "sust.b.a1d.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4443               []>;
4444 def SUST_B_1D_ARRAY_B64_CLAMP
4445   : NVPTXInst<(outs),
4446               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
4447               "sust.b.a1d.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4448               []>;
4449 def SUST_B_1D_ARRAY_V2B8_CLAMP
4450   : NVPTXInst<(outs),
4451               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4452                    Int16Regs:$g),
4453               "sust.b.a1d.v2.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4454               []>;
4455 def SUST_B_1D_ARRAY_V2B16_CLAMP
4456   : NVPTXInst<(outs),
4457               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4458                    Int16Regs:$g),
4459               "sust.b.a1d.v2.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4460               []>;
4461 def SUST_B_1D_ARRAY_V2B32_CLAMP
4462   : NVPTXInst<(outs),
4463               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
4464                    Int32Regs:$g),
4465               "sust.b.a1d.v2.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4466               []>;
4467 def SUST_B_1D_ARRAY_V2B64_CLAMP
4468   : NVPTXInst<(outs),
4469               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
4470                    Int64Regs:$g),
4471               "sust.b.a1d.v2.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4472               []>;
4473 def SUST_B_1D_ARRAY_V4B8_CLAMP
4474   : NVPTXInst<(outs),
4475               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4476                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4477               "sust.b.a1d.v4.b8.clamp \t[$s, \\{$idx, $x\\}], "
4478               "\\{$r, $g, $b, $a\\};",
4479               []>;
4480 def SUST_B_1D_ARRAY_V4B16_CLAMP
4481   : NVPTXInst<(outs),
4482               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4483                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4484              "sust.b.a1d.v4.b16.clamp \t[$s, \\{$idx, $x\\}], "
4485              "\\{$r, $g, $b, $a\\};",
4486               []>;
4487 def SUST_B_1D_ARRAY_V4B32_CLAMP
4488   : NVPTXInst<(outs),
4489               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
4490                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4491              "sust.b.a1d.v4.b32.clamp \t[$s, \\{$idx, $x\\}], "
4492              "\\{$r, $g, $b, $a\\};",
4493               []>;
4494
4495
4496 def SUST_B_2D_B8_CLAMP
4497   : NVPTXInst<(outs),
4498               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
4499               "sust.b.2d.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4500               []>;
4501 def SUST_B_2D_B16_CLAMP
4502   : NVPTXInst<(outs),
4503               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
4504               "sust.b.2d.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4505               []>;
4506 def SUST_B_2D_B32_CLAMP
4507   : NVPTXInst<(outs),
4508               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
4509               "sust.b.2d.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4510               []>;
4511 def SUST_B_2D_B64_CLAMP
4512   : NVPTXInst<(outs),
4513               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
4514               "sust.b.2d.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4515               []>;
4516 def SUST_B_2D_V2B8_CLAMP
4517   : NVPTXInst<(outs),
4518               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4519                    Int16Regs:$g),
4520               "sust.b.2d.v2.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4521               []>;
4522 def SUST_B_2D_V2B16_CLAMP
4523   : NVPTXInst<(outs),
4524               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4525                    Int16Regs:$g),
4526               "sust.b.2d.v2.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4527               []>;
4528 def SUST_B_2D_V2B32_CLAMP
4529   : NVPTXInst<(outs),
4530               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
4531                    Int32Regs:$g),
4532               "sust.b.2d.v2.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4533               []>;
4534 def SUST_B_2D_V2B64_CLAMP
4535   : NVPTXInst<(outs),
4536               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
4537                    Int64Regs:$g),
4538               "sust.b.2d.v2.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4539               []>;
4540 def SUST_B_2D_V4B8_CLAMP
4541   : NVPTXInst<(outs),
4542               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4543                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4544               "sust.b.2d.v4.b8.clamp \t[$s, \\{$x, $y\\}], "
4545               "\\{$r, $g, $b, $a\\};",
4546               []>;
4547 def SUST_B_2D_V4B16_CLAMP
4548   : NVPTXInst<(outs),
4549               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4550                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4551              "sust.b.2d.v4.b16.clamp \t[$s, \\{$x, $y\\}], "
4552              "\\{$r, $g, $b, $a\\};",
4553               []>;
4554 def SUST_B_2D_V4B32_CLAMP
4555   : NVPTXInst<(outs),
4556               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
4557                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4558              "sust.b.2d.v4.b32.clamp \t[$s, \\{$x, $y\\}], "
4559              "\\{$r, $g, $b, $a\\};",
4560               []>;
4561
4562
4563 def SUST_B_2D_ARRAY_B8_CLAMP
4564   : NVPTXInst<(outs),
4565               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4566                    Int16Regs:$r),
4567               "sust.b.a2d.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4568               []>;
4569 def SUST_B_2D_ARRAY_B16_CLAMP
4570   : NVPTXInst<(outs),
4571               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4572                    Int16Regs:$r),
4573               "sust.b.a2d.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4574               []>;
4575 def SUST_B_2D_ARRAY_B32_CLAMP
4576   : NVPTXInst<(outs),
4577               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4578                    Int32Regs:$r),
4579               "sust.b.a2d.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4580               []>;
4581 def SUST_B_2D_ARRAY_B64_CLAMP
4582   : NVPTXInst<(outs),
4583               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4584                    Int64Regs:$r),
4585               "sust.b.a2d.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4586               []>;
4587 def SUST_B_2D_ARRAY_V2B8_CLAMP
4588   : NVPTXInst<(outs),
4589               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4590                    Int16Regs:$r, Int16Regs:$g),
4591               "sust.b.a2d.v2.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4592               "\\{$r, $g\\};",
4593               []>;
4594 def SUST_B_2D_ARRAY_V2B16_CLAMP
4595   : NVPTXInst<(outs),
4596               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4597                    Int16Regs:$r, Int16Regs:$g),
4598              "sust.b.a2d.v2.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4599              "\\{$r, $g\\};",
4600               []>;
4601 def SUST_B_2D_ARRAY_V2B32_CLAMP
4602   : NVPTXInst<(outs),
4603               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4604                    Int32Regs:$r, Int32Regs:$g),
4605              "sust.b.a2d.v2.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4606              "\\{$r, $g\\};",
4607               []>;
4608 def SUST_B_2D_ARRAY_V2B64_CLAMP
4609   : NVPTXInst<(outs),
4610               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4611                    Int64Regs:$r, Int64Regs:$g),
4612              "sust.b.a2d.v2.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4613              "\\{$r, $g\\};",
4614               []>;
4615 def SUST_B_2D_ARRAY_V4B8_CLAMP
4616   : NVPTXInst<(outs),
4617               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4618                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4619       "sust.b.a2d.v4.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4620       "\\{$r, $g, $b, $a\\};",
4621               []>;
4622 def SUST_B_2D_ARRAY_V4B16_CLAMP
4623   : NVPTXInst<(outs),
4624               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4625                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4626      "sust.b.a2d.v4.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4627      "\\{$r, $g, $b, $a\\};",
4628               []>;
4629 def SUST_B_2D_ARRAY_V4B32_CLAMP
4630   : NVPTXInst<(outs),
4631               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4632                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4633      "sust.b.a2d.v4.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4634      "\\{$r, $g, $b, $a\\};",
4635               []>;
4636
4637
4638 def SUST_B_3D_B8_CLAMP
4639   : NVPTXInst<(outs),
4640               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4641                    Int16Regs:$r),
4642               "sust.b.3d.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4643               []>;
4644 def SUST_B_3D_B16_CLAMP
4645   : NVPTXInst<(outs),
4646               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4647                    Int16Regs:$r),
4648               "sust.b.3d.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4649               []>;
4650 def SUST_B_3D_B32_CLAMP
4651   : NVPTXInst<(outs),
4652               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4653                    Int32Regs:$r),
4654               "sust.b.3d.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4655               []>;
4656 def SUST_B_3D_B64_CLAMP
4657   : NVPTXInst<(outs),
4658               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4659                    Int64Regs:$r),
4660               "sust.b.3d.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4661               []>;
4662 def SUST_B_3D_V2B8_CLAMP
4663   : NVPTXInst<(outs),
4664               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4665                    Int16Regs:$r, Int16Regs:$g),
4666               "sust.b.3d.v2.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4667               "\\{$r, $g\\};",
4668               []>;
4669 def SUST_B_3D_V2B16_CLAMP
4670   : NVPTXInst<(outs),
4671               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4672                    Int16Regs:$r, Int16Regs:$g),
4673               "sust.b.3d.v2.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4674               "\\{$r, $g\\};",
4675               []>;
4676 def SUST_B_3D_V2B32_CLAMP
4677   : NVPTXInst<(outs),
4678               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4679                    Int32Regs:$r, Int32Regs:$g),
4680               "sust.b.3d.v2.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4681               "\\{$r, $g\\};",
4682               []>;
4683 def SUST_B_3D_V2B64_CLAMP
4684   : NVPTXInst<(outs),
4685               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4686                    Int64Regs:$r, Int64Regs:$g),
4687               "sust.b.3d.v2.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4688               "\\{$r, $g\\};",
4689               []>;
4690 def SUST_B_3D_V4B8_CLAMP
4691   : NVPTXInst<(outs),
4692               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4693                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4694          "sust.b.3d.v4.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4695          "\\{$r, $g, $b, $a\\};",
4696               []>;
4697 def SUST_B_3D_V4B16_CLAMP
4698   : NVPTXInst<(outs),
4699               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4700                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4701         "sust.b.3d.v4.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4702         "\\{$r, $g, $b, $a\\};",
4703               []>;
4704 def SUST_B_3D_V4B32_CLAMP
4705   : NVPTXInst<(outs),
4706               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4707                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4708         "sust.b.3d.v4.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4709         "\\{$r, $g, $b, $a\\};",
4710               []>;
4711
4712
4713 // .trap variant
4714 def SUST_B_1D_B8_TRAP
4715   : NVPTXInst<(outs),
4716               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4717               "sust.b.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
4718               []>;
4719 def SUST_B_1D_B16_TRAP
4720   : NVPTXInst<(outs),
4721               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4722               "sust.b.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
4723               []>;
4724 def SUST_B_1D_B32_TRAP
4725   : NVPTXInst<(outs),
4726               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
4727               "sust.b.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
4728               []>;
4729 def SUST_B_1D_B64_TRAP
4730   : NVPTXInst<(outs),
4731               (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
4732               "sust.b.1d.b64.trap \t[$s, \\{$x\\}], \\{$r\\};",
4733               []>;
4734 def SUST_B_1D_V2B8_TRAP
4735   : NVPTXInst<(outs),
4736               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4737               "sust.b.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
4738               []>;
4739 def SUST_B_1D_V2B16_TRAP
4740   : NVPTXInst<(outs),
4741               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4742               "sust.b.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
4743               []>;
4744 def SUST_B_1D_V2B32_TRAP
4745   : NVPTXInst<(outs),
4746               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
4747               "sust.b.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
4748               []>;
4749 def SUST_B_1D_V2B64_TRAP
4750   : NVPTXInst<(outs),
4751               (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
4752               "sust.b.1d.v2.b64.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
4753               []>;
4754 def SUST_B_1D_V4B8_TRAP
4755   : NVPTXInst<(outs),
4756               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
4757                    Int16Regs:$b, Int16Regs:$a),
4758               "sust.b.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4759               []>;
4760 def SUST_B_1D_V4B16_TRAP
4761   : NVPTXInst<(outs),
4762               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
4763                    Int16Regs:$b, Int16Regs:$a),
4764               "sust.b.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4765               []>;
4766 def SUST_B_1D_V4B32_TRAP
4767   : NVPTXInst<(outs),
4768               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
4769                    Int32Regs:$b, Int32Regs:$a),
4770               "sust.b.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4771               []>;
4772
4773
4774 def SUST_B_1D_ARRAY_B8_TRAP
4775   : NVPTXInst<(outs),
4776               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
4777               "sust.b.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4778               []>;
4779 def SUST_B_1D_ARRAY_B16_TRAP
4780   : NVPTXInst<(outs),
4781               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
4782               "sust.b.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4783               []>;
4784 def SUST_B_1D_ARRAY_B32_TRAP
4785   : NVPTXInst<(outs),
4786               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
4787               "sust.b.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4788               []>;
4789 def SUST_B_1D_ARRAY_B64_TRAP
4790   : NVPTXInst<(outs),
4791               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
4792               "sust.b.a1d.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4793               []>;
4794 def SUST_B_1D_ARRAY_V2B8_TRAP
4795   : NVPTXInst<(outs),
4796               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4797                    Int16Regs:$g),
4798               "sust.b.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4799               []>;
4800 def SUST_B_1D_ARRAY_V2B16_TRAP
4801   : NVPTXInst<(outs),
4802               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4803                    Int16Regs:$g),
4804               "sust.b.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4805               []>;
4806 def SUST_B_1D_ARRAY_V2B32_TRAP
4807   : NVPTXInst<(outs),
4808               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
4809                    Int32Regs:$g),
4810               "sust.b.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4811               []>;
4812 def SUST_B_1D_ARRAY_V2B64_TRAP
4813   : NVPTXInst<(outs),
4814               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
4815                    Int64Regs:$g),
4816               "sust.b.a1d.v2.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4817               []>;
4818 def SUST_B_1D_ARRAY_V4B8_TRAP
4819   : NVPTXInst<(outs),
4820               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4821                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4822               "sust.b.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
4823               "\\{$r, $g, $b, $a\\};",
4824               []>;
4825 def SUST_B_1D_ARRAY_V4B16_TRAP
4826   : NVPTXInst<(outs),
4827               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4828                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4829              "sust.b.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
4830              "\\{$r, $g, $b, $a\\};",
4831               []>;
4832 def SUST_B_1D_ARRAY_V4B32_TRAP
4833   : NVPTXInst<(outs),
4834               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
4835                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4836              "sust.b.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
4837              "\\{$r, $g, $b, $a\\};",
4838               []>;
4839
4840
4841 def SUST_B_2D_B8_TRAP
4842   : NVPTXInst<(outs),
4843               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
4844               "sust.b.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
4845               []>;
4846 def SUST_B_2D_B16_TRAP
4847   : NVPTXInst<(outs),
4848               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
4849               "sust.b.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
4850               []>;
4851 def SUST_B_2D_B32_TRAP
4852   : NVPTXInst<(outs),
4853               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
4854               "sust.b.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
4855               []>;
4856 def SUST_B_2D_B64_TRAP
4857   : NVPTXInst<(outs),
4858               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
4859               "sust.b.2d.b64.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
4860               []>;
4861 def SUST_B_2D_V2B8_TRAP
4862   : NVPTXInst<(outs),
4863               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4864                    Int16Regs:$g),
4865               "sust.b.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4866               []>;
4867 def SUST_B_2D_V2B16_TRAP
4868   : NVPTXInst<(outs),
4869               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4870                    Int16Regs:$g),
4871               "sust.b.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4872               []>;
4873 def SUST_B_2D_V2B32_TRAP
4874   : NVPTXInst<(outs),
4875               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
4876                    Int32Regs:$g),
4877               "sust.b.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4878               []>;
4879 def SUST_B_2D_V2B64_TRAP
4880   : NVPTXInst<(outs),
4881               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
4882                    Int64Regs:$g),
4883               "sust.b.2d.v2.b64.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4884               []>;
4885 def SUST_B_2D_V4B8_TRAP
4886   : NVPTXInst<(outs),
4887               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4888                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4889               "sust.b.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
4890               "\\{$r, $g, $b, $a\\};",
4891               []>;
4892 def SUST_B_2D_V4B16_TRAP
4893   : NVPTXInst<(outs),
4894               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4895                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4896              "sust.b.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
4897              "\\{$r, $g, $b, $a\\};",
4898               []>;
4899 def SUST_B_2D_V4B32_TRAP
4900   : NVPTXInst<(outs),
4901               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
4902                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4903              "sust.b.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
4904              "\\{$r, $g, $b, $a\\};",
4905               []>;
4906
4907
4908 def SUST_B_2D_ARRAY_B8_TRAP
4909   : NVPTXInst<(outs),
4910               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4911                    Int16Regs:$r),
4912               "sust.b.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4913               []>;
4914 def SUST_B_2D_ARRAY_B16_TRAP
4915   : NVPTXInst<(outs),
4916               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4917                    Int16Regs:$r),
4918               "sust.b.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4919               []>;
4920 def SUST_B_2D_ARRAY_B32_TRAP
4921   : NVPTXInst<(outs),
4922               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4923                    Int32Regs:$r),
4924               "sust.b.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4925               []>;
4926 def SUST_B_2D_ARRAY_B64_TRAP
4927   : NVPTXInst<(outs),
4928               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4929                    Int64Regs:$r),
4930               "sust.b.a2d.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4931               []>;
4932 def SUST_B_2D_ARRAY_V2B8_TRAP
4933   : NVPTXInst<(outs),
4934               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4935                    Int16Regs:$r, Int16Regs:$g),
4936               "sust.b.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
4937               "\\{$r, $g\\};",
4938               []>;
4939 def SUST_B_2D_ARRAY_V2B16_TRAP
4940   : NVPTXInst<(outs),
4941               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4942                    Int16Regs:$r, Int16Regs:$g),
4943              "sust.b.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
4944              "\\{$r, $g\\};",
4945               []>;
4946 def SUST_B_2D_ARRAY_V2B32_TRAP
4947   : NVPTXInst<(outs),
4948               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4949                    Int32Regs:$r, Int32Regs:$g),
4950              "sust.b.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
4951              "\\{$r, $g\\};",
4952               []>;
4953 def SUST_B_2D_ARRAY_V2B64_TRAP
4954   : NVPTXInst<(outs),
4955               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4956                    Int64Regs:$r, Int64Regs:$g),
4957              "sust.b.a2d.v2.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
4958              "\\{$r, $g\\};",
4959               []>;
4960 def SUST_B_2D_ARRAY_V4B8_TRAP
4961   : NVPTXInst<(outs),
4962               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4963                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4964       "sust.b.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
4965       "\\{$r, $g, $b, $a\\};",
4966               []>;
4967 def SUST_B_2D_ARRAY_V4B16_TRAP
4968   : NVPTXInst<(outs),
4969               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4970                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4971      "sust.b.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
4972      "\\{$r, $g, $b, $a\\};",
4973               []>;
4974 def SUST_B_2D_ARRAY_V4B32_TRAP
4975   : NVPTXInst<(outs),
4976               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4977                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4978      "sust.b.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
4979      "\\{$r, $g, $b, $a\\};",
4980               []>;
4981
4982
4983 def SUST_B_3D_B8_TRAP
4984   : NVPTXInst<(outs),
4985               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4986                    Int16Regs:$r),
4987               "sust.b.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4988               []>;
4989 def SUST_B_3D_B16_TRAP
4990   : NVPTXInst<(outs),
4991               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4992                    Int16Regs:$r),
4993               "sust.b.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4994               []>;
4995 def SUST_B_3D_B32_TRAP
4996   : NVPTXInst<(outs),
4997               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4998                    Int32Regs:$r),
4999               "sust.b.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5000               []>;
5001 def SUST_B_3D_B64_TRAP
5002   : NVPTXInst<(outs),
5003               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5004                    Int64Regs:$r),
5005               "sust.b.3d.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5006               []>;
5007 def SUST_B_3D_V2B8_TRAP
5008   : NVPTXInst<(outs),
5009               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5010                    Int16Regs:$r, Int16Regs:$g),
5011               "sust.b.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5012               "\\{$r, $g\\};",
5013               []>;
5014 def SUST_B_3D_V2B16_TRAP
5015   : NVPTXInst<(outs),
5016               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5017                    Int16Regs:$r, Int16Regs:$g),
5018               "sust.b.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5019               "\\{$r, $g\\};",
5020               []>;
5021 def SUST_B_3D_V2B32_TRAP
5022   : NVPTXInst<(outs),
5023               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5024                    Int32Regs:$r, Int32Regs:$g),
5025               "sust.b.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5026               "\\{$r, $g\\};",
5027               []>;
5028 def SUST_B_3D_V2B64_TRAP
5029   : NVPTXInst<(outs),
5030               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5031                    Int64Regs:$r, Int64Regs:$g),
5032               "sust.b.3d.v2.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5033               "\\{$r, $g\\};",
5034               []>;
5035 def SUST_B_3D_V4B8_TRAP
5036   : NVPTXInst<(outs),
5037               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5038                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5039          "sust.b.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5040          "\\{$r, $g, $b, $a\\};",
5041               []>;
5042 def SUST_B_3D_V4B16_TRAP
5043   : NVPTXInst<(outs),
5044               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5045                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5046         "sust.b.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5047         "\\{$r, $g, $b, $a\\};",
5048               []>;
5049 def SUST_B_3D_V4B32_TRAP
5050   : NVPTXInst<(outs),
5051               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5052                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5053         "sust.b.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5054         "\\{$r, $g, $b, $a\\};",
5055               []>;
5056
5057
5058 // .zero variant
5059 def SUST_B_1D_B8_ZERO
5060   : NVPTXInst<(outs),
5061               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5062               "sust.b.1d.b8.zero \t[$s, \\{$x\\}], \\{$r\\};",
5063               []>;
5064 def SUST_B_1D_B16_ZERO
5065   : NVPTXInst<(outs),
5066               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5067               "sust.b.1d.b16.zero \t[$s, \\{$x\\}], \\{$r\\};",
5068               []>;
5069 def SUST_B_1D_B32_ZERO
5070   : NVPTXInst<(outs),
5071               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5072               "sust.b.1d.b32.zero \t[$s, \\{$x\\}], \\{$r\\};",
5073               []>;
5074 def SUST_B_1D_B64_ZERO
5075   : NVPTXInst<(outs),
5076               (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5077               "sust.b.1d.b64.zero \t[$s, \\{$x\\}], \\{$r\\};",
5078               []>;
5079 def SUST_B_1D_V2B8_ZERO
5080   : NVPTXInst<(outs),
5081               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5082               "sust.b.1d.v2.b8.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5083               []>;
5084 def SUST_B_1D_V2B16_ZERO
5085   : NVPTXInst<(outs),
5086               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5087               "sust.b.1d.v2.b16.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5088               []>;
5089 def SUST_B_1D_V2B32_ZERO
5090   : NVPTXInst<(outs),
5091               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5092               "sust.b.1d.v2.b32.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5093               []>;
5094 def SUST_B_1D_V2B64_ZERO
5095   : NVPTXInst<(outs),
5096               (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5097               "sust.b.1d.v2.b64.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5098               []>;
5099 def SUST_B_1D_V4B8_ZERO
5100   : NVPTXInst<(outs),
5101               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5102                    Int16Regs:$b, Int16Regs:$a),
5103               "sust.b.1d.v4.b8.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5104               []>;
5105 def SUST_B_1D_V4B16_ZERO
5106   : NVPTXInst<(outs),
5107               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5108                    Int16Regs:$b, Int16Regs:$a),
5109               "sust.b.1d.v4.b16.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5110               []>;
5111 def SUST_B_1D_V4B32_ZERO
5112   : NVPTXInst<(outs),
5113               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
5114                    Int32Regs:$b, Int32Regs:$a),
5115               "sust.b.1d.v4.b32.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5116               []>;
5117
5118
5119 def SUST_B_1D_ARRAY_B8_ZERO
5120   : NVPTXInst<(outs),
5121               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5122               "sust.b.a1d.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5123               []>;
5124 def SUST_B_1D_ARRAY_B16_ZERO
5125   : NVPTXInst<(outs),
5126               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5127               "sust.b.a1d.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5128               []>;
5129 def SUST_B_1D_ARRAY_B32_ZERO
5130   : NVPTXInst<(outs),
5131               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
5132               "sust.b.a1d.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5133               []>;
5134 def SUST_B_1D_ARRAY_B64_ZERO
5135   : NVPTXInst<(outs),
5136               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
5137               "sust.b.a1d.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5138               []>;
5139 def SUST_B_1D_ARRAY_V2B8_ZERO
5140   : NVPTXInst<(outs),
5141               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5142                    Int16Regs:$g),
5143               "sust.b.a1d.v2.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5144               []>;
5145 def SUST_B_1D_ARRAY_V2B16_ZERO
5146   : NVPTXInst<(outs),
5147               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5148                    Int16Regs:$g),
5149               "sust.b.a1d.v2.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5150               []>;
5151 def SUST_B_1D_ARRAY_V2B32_ZERO
5152   : NVPTXInst<(outs),
5153               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5154                    Int32Regs:$g),
5155               "sust.b.a1d.v2.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5156               []>;
5157 def SUST_B_1D_ARRAY_V2B64_ZERO
5158   : NVPTXInst<(outs),
5159               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
5160                    Int64Regs:$g),
5161               "sust.b.a1d.v2.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5162               []>;
5163 def SUST_B_1D_ARRAY_V4B8_ZERO
5164   : NVPTXInst<(outs),
5165               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5166                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5167               "sust.b.a1d.v4.b8.zero \t[$s, \\{$idx, $x\\}], "
5168               "\\{$r, $g, $b, $a\\};",
5169               []>;
5170 def SUST_B_1D_ARRAY_V4B16_ZERO
5171   : NVPTXInst<(outs),
5172               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5173                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5174              "sust.b.a1d.v4.b16.zero \t[$s, \\{$idx, $x\\}], "
5175              "\\{$r, $g, $b, $a\\};",
5176               []>;
5177 def SUST_B_1D_ARRAY_V4B32_ZERO
5178   : NVPTXInst<(outs),
5179               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5180                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5181              "sust.b.a1d.v4.b32.zero \t[$s, \\{$idx, $x\\}], "
5182              "\\{$r, $g, $b, $a\\};",
5183               []>;
5184
5185
5186 def SUST_B_2D_B8_ZERO
5187   : NVPTXInst<(outs),
5188               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5189               "sust.b.2d.b8.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5190               []>;
5191 def SUST_B_2D_B16_ZERO
5192   : NVPTXInst<(outs),
5193               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5194               "sust.b.2d.b16.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5195               []>;
5196 def SUST_B_2D_B32_ZERO
5197   : NVPTXInst<(outs),
5198               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5199               "sust.b.2d.b32.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5200               []>;
5201 def SUST_B_2D_B64_ZERO
5202   : NVPTXInst<(outs),
5203               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5204               "sust.b.2d.b64.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5205               []>;
5206 def SUST_B_2D_V2B8_ZERO
5207   : NVPTXInst<(outs),
5208               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5209                    Int16Regs:$g),
5210               "sust.b.2d.v2.b8.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5211               []>;
5212 def SUST_B_2D_V2B16_ZERO
5213   : NVPTXInst<(outs),
5214               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5215                    Int16Regs:$g),
5216               "sust.b.2d.v2.b16.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5217               []>;
5218 def SUST_B_2D_V2B32_ZERO
5219   : NVPTXInst<(outs),
5220               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5221                    Int32Regs:$g),
5222               "sust.b.2d.v2.b32.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5223               []>;
5224 def SUST_B_2D_V2B64_ZERO
5225   : NVPTXInst<(outs),
5226               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5227                    Int64Regs:$g),
5228               "sust.b.2d.v2.b64.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5229               []>;
5230 def SUST_B_2D_V4B8_ZERO
5231   : NVPTXInst<(outs),
5232               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5233                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5234               "sust.b.2d.v4.b8.zero \t[$s, \\{$x, $y\\}], "
5235               "\\{$r, $g, $b, $a\\};",
5236               []>;
5237 def SUST_B_2D_V4B16_ZERO
5238   : NVPTXInst<(outs),
5239               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5240                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5241              "sust.b.2d.v4.b16.zero \t[$s, \\{$x, $y\\}], "
5242              "\\{$r, $g, $b, $a\\};",
5243               []>;
5244 def SUST_B_2D_V4B32_ZERO
5245   : NVPTXInst<(outs),
5246               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5247                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5248              "sust.b.2d.v4.b32.zero \t[$s, \\{$x, $y\\}], "
5249              "\\{$r, $g, $b, $a\\};",
5250               []>;
5251
5252
5253 def SUST_B_2D_ARRAY_B8_ZERO
5254   : NVPTXInst<(outs),
5255               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5256                    Int16Regs:$r),
5257               "sust.b.a2d.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5258               []>;
5259 def SUST_B_2D_ARRAY_B16_ZERO
5260   : NVPTXInst<(outs),
5261               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5262                    Int16Regs:$r),
5263               "sust.b.a2d.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5264               []>;
5265 def SUST_B_2D_ARRAY_B32_ZERO
5266   : NVPTXInst<(outs),
5267               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5268                    Int32Regs:$r),
5269               "sust.b.a2d.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5270               []>;
5271 def SUST_B_2D_ARRAY_B64_ZERO
5272   : NVPTXInst<(outs),
5273               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5274                    Int64Regs:$r),
5275               "sust.b.a2d.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5276               []>;
5277 def SUST_B_2D_ARRAY_V2B8_ZERO
5278   : NVPTXInst<(outs),
5279               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5280                    Int16Regs:$r, Int16Regs:$g),
5281               "sust.b.a2d.v2.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5282               "\\{$r, $g\\};",
5283               []>;
5284 def SUST_B_2D_ARRAY_V2B16_ZERO
5285   : NVPTXInst<(outs),
5286               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5287                    Int16Regs:$r, Int16Regs:$g),
5288              "sust.b.a2d.v2.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5289              "\\{$r, $g\\};",
5290               []>;
5291 def SUST_B_2D_ARRAY_V2B32_ZERO
5292   : NVPTXInst<(outs),
5293               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5294                    Int32Regs:$r, Int32Regs:$g),
5295              "sust.b.a2d.v2.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5296              "\\{$r, $g\\};",
5297               []>;
5298 def SUST_B_2D_ARRAY_V2B64_ZERO
5299   : NVPTXInst<(outs),
5300               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5301                    Int64Regs:$r, Int64Regs:$g),
5302              "sust.b.a2d.v2.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5303              "\\{$r, $g\\};",
5304               []>;
5305 def SUST_B_2D_ARRAY_V4B8_ZERO
5306   : NVPTXInst<(outs),
5307               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5308                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5309       "sust.b.a2d.v4.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5310       "\\{$r, $g, $b, $a\\};",
5311               []>;
5312 def SUST_B_2D_ARRAY_V4B16_ZERO
5313   : NVPTXInst<(outs),
5314               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5315                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5316      "sust.b.a2d.v4.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5317      "\\{$r, $g, $b, $a\\};",
5318               []>;
5319 def SUST_B_2D_ARRAY_V4B32_ZERO
5320   : NVPTXInst<(outs),
5321               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5322                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5323      "sust.b.a2d.v4.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5324      "\\{$r, $g, $b, $a\\};",
5325               []>;
5326
5327
5328 def SUST_B_3D_B8_ZERO
5329   : NVPTXInst<(outs),
5330               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5331                    Int16Regs:$r),
5332               "sust.b.3d.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5333               []>;
5334 def SUST_B_3D_B16_ZERO
5335   : NVPTXInst<(outs),
5336               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5337                    Int16Regs:$r),
5338               "sust.b.3d.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5339               []>;
5340 def SUST_B_3D_B32_ZERO
5341   : NVPTXInst<(outs),
5342               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5343                    Int32Regs:$r),
5344               "sust.b.3d.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5345               []>;
5346 def SUST_B_3D_B64_ZERO
5347   : NVPTXInst<(outs),
5348               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5349                    Int64Regs:$r),
5350               "sust.b.3d.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5351               []>;
5352 def SUST_B_3D_V2B8_ZERO
5353   : NVPTXInst<(outs),
5354               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5355                    Int16Regs:$r, Int16Regs:$g),
5356               "sust.b.3d.v2.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5357               "\\{$r, $g\\};",
5358               []>;
5359 def SUST_B_3D_V2B16_ZERO
5360   : NVPTXInst<(outs),
5361               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5362                    Int16Regs:$r, Int16Regs:$g),
5363               "sust.b.3d.v2.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5364               "\\{$r, $g\\};",
5365               []>;
5366 def SUST_B_3D_V2B32_ZERO
5367   : NVPTXInst<(outs),
5368               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5369                    Int32Regs:$r, Int32Regs:$g),
5370               "sust.b.3d.v2.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5371               "\\{$r, $g\\};",
5372               []>;
5373 def SUST_B_3D_V2B64_ZERO
5374   : NVPTXInst<(outs),
5375               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5376                    Int64Regs:$r, Int64Regs:$g),
5377               "sust.b.3d.v2.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5378               "\\{$r, $g\\};",
5379               []>;
5380 def SUST_B_3D_V4B8_ZERO
5381   : NVPTXInst<(outs),
5382               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5383                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5384          "sust.b.3d.v4.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5385          "\\{$r, $g, $b, $a\\};",
5386               []>;
5387 def SUST_B_3D_V4B16_ZERO
5388   : NVPTXInst<(outs),
5389               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5390                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5391         "sust.b.3d.v4.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5392         "\\{$r, $g, $b, $a\\};",
5393               []>;
5394 def SUST_B_3D_V4B32_ZERO
5395   : NVPTXInst<(outs),
5396               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5397                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5398         "sust.b.3d.v4.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5399         "\\{$r, $g, $b, $a\\};",
5400               []>;
5401
5402
5403
5404 // Formatted
5405
5406 def SUST_P_1D_B8_TRAP
5407   : NVPTXInst<(outs),
5408               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5409               "sust.p.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
5410               []>;
5411 def SUST_P_1D_B16_TRAP
5412   : NVPTXInst<(outs),
5413               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5414               "sust.p.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
5415               []>;
5416 def SUST_P_1D_B32_TRAP
5417   : NVPTXInst<(outs),
5418               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5419               "sust.p.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
5420               []>;
5421 def SUST_P_1D_V2B8_TRAP
5422   : NVPTXInst<(outs),
5423               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5424               "sust.p.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5425               []>;
5426 def SUST_P_1D_V2B16_TRAP
5427   : NVPTXInst<(outs),
5428               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5429               "sust.p.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5430               []>;
5431 def SUST_P_1D_V2B32_TRAP
5432   : NVPTXInst<(outs),
5433               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5434               "sust.p.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5435               []>;
5436 def SUST_P_1D_V4B8_TRAP
5437   : NVPTXInst<(outs),
5438               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5439                    Int16Regs:$b, Int16Regs:$a),
5440               "sust.p.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5441               []>;
5442 def SUST_P_1D_V4B16_TRAP
5443   : NVPTXInst<(outs),
5444               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5445                    Int16Regs:$b, Int16Regs:$a),
5446               "sust.p.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5447               []>;
5448 def SUST_P_1D_V4B32_TRAP
5449   : NVPTXInst<(outs),
5450               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
5451                    Int32Regs:$b, Int32Regs:$a),
5452               "sust.p.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5453               []>;
5454
5455
5456 def SUST_P_1D_ARRAY_B8_TRAP
5457   : NVPTXInst<(outs),
5458               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5459               "sust.p.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5460               []>;
5461 def SUST_P_1D_ARRAY_B16_TRAP
5462   : NVPTXInst<(outs),
5463               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5464               "sust.p.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5465               []>;
5466 def SUST_P_1D_ARRAY_B32_TRAP
5467   : NVPTXInst<(outs),
5468               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
5469               "sust.p.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5470               []>;
5471 def SUST_P_1D_ARRAY_V2B8_TRAP
5472   : NVPTXInst<(outs),
5473               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5474                    Int16Regs:$g),
5475               "sust.p.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5476               []>;
5477 def SUST_P_1D_ARRAY_V2B16_TRAP
5478   : NVPTXInst<(outs),
5479               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5480                    Int16Regs:$g),
5481               "sust.p.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5482               []>;
5483 def SUST_P_1D_ARRAY_V2B32_TRAP
5484   : NVPTXInst<(outs),
5485               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5486                    Int32Regs:$g),
5487               "sust.p.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5488               []>;
5489 def SUST_P_1D_ARRAY_V4B8_TRAP
5490   : NVPTXInst<(outs),
5491               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5492                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5493               "sust.p.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
5494               "\\{$r, $g, $b, $a\\};",
5495               []>;
5496 def SUST_P_1D_ARRAY_V4B16_TRAP
5497   : NVPTXInst<(outs),
5498               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5499                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5500              "sust.p.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
5501              "\\{$r, $g, $b, $a\\};",
5502               []>;
5503 def SUST_P_1D_ARRAY_V4B32_TRAP
5504   : NVPTXInst<(outs),
5505               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5506                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5507              "sust.p.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
5508              "\\{$r, $g, $b, $a\\};",
5509               []>;
5510
5511
5512 def SUST_P_2D_B8_TRAP
5513   : NVPTXInst<(outs),
5514               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5515               "sust.p.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5516               []>;
5517 def SUST_P_2D_B16_TRAP
5518   : NVPTXInst<(outs),
5519               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5520               "sust.p.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5521               []>;
5522 def SUST_P_2D_B32_TRAP
5523   : NVPTXInst<(outs),
5524               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5525               "sust.p.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5526               []>;
5527 def SUST_P_2D_V2B8_TRAP
5528   : NVPTXInst<(outs),
5529               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5530                    Int16Regs:$g),
5531               "sust.p.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5532               []>;
5533 def SUST_P_2D_V2B16_TRAP
5534   : NVPTXInst<(outs),
5535               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5536                    Int16Regs:$g),
5537               "sust.p.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5538               []>;
5539 def SUST_P_2D_V2B32_TRAP
5540   : NVPTXInst<(outs),
5541               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5542                    Int32Regs:$g),
5543               "sust.p.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5544               []>;
5545 def SUST_P_2D_V4B8_TRAP
5546   : NVPTXInst<(outs),
5547               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5548                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5549               "sust.p.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
5550               "\\{$r, $g, $b, $a\\};",
5551               []>;
5552 def SUST_P_2D_V4B16_TRAP
5553   : NVPTXInst<(outs),
5554               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5555                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5556              "sust.p.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
5557              "\\{$r, $g, $b, $a\\};",
5558               []>;
5559 def SUST_P_2D_V4B32_TRAP
5560   : NVPTXInst<(outs),
5561               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5562                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5563              "sust.p.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
5564              "\\{$r, $g, $b, $a\\};",
5565               []>;
5566
5567
5568 def SUST_P_2D_ARRAY_B8_TRAP
5569   : NVPTXInst<(outs),
5570               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5571                    Int16Regs:$r),
5572               "sust.p.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5573               []>;
5574 def SUST_P_2D_ARRAY_B16_TRAP
5575   : NVPTXInst<(outs),
5576               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5577                    Int16Regs:$r),
5578               "sust.p.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5579               []>;
5580 def SUST_P_2D_ARRAY_B32_TRAP
5581   : NVPTXInst<(outs),
5582               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5583                    Int32Regs:$r),
5584               "sust.p.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5585               []>;
5586 def SUST_P_2D_ARRAY_V2B8_TRAP
5587   : NVPTXInst<(outs),
5588               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5589                    Int16Regs:$r, Int16Regs:$g),
5590               "sust.p.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5591               "\\{$r, $g\\};",
5592               []>;
5593 def SUST_P_2D_ARRAY_V2B16_TRAP
5594   : NVPTXInst<(outs),
5595               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5596                    Int16Regs:$r, Int16Regs:$g),
5597              "sust.p.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5598              "\\{$r, $g\\};",
5599               []>;
5600 def SUST_P_2D_ARRAY_V2B32_TRAP
5601   : NVPTXInst<(outs),
5602               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5603                    Int32Regs:$r, Int32Regs:$g),
5604              "sust.p.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5605              "\\{$r, $g\\};",
5606               []>;
5607 def SUST_P_2D_ARRAY_V4B8_TRAP
5608   : NVPTXInst<(outs),
5609               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5610                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5611       "sust.p.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5612       "\\{$r, $g, $b, $a\\};",
5613               []>;
5614 def SUST_P_2D_ARRAY_V4B16_TRAP
5615   : NVPTXInst<(outs),
5616               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5617                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5618      "sust.p.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5619      "\\{$r, $g, $b, $a\\};",
5620               []>;
5621 def SUST_P_2D_ARRAY_V4B32_TRAP
5622   : NVPTXInst<(outs),
5623               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5624                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5625      "sust.p.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5626      "\\{$r, $g, $b, $a\\};",
5627               []>;
5628
5629
5630 def SUST_P_3D_B8_TRAP
5631   : NVPTXInst<(outs),
5632               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5633                    Int16Regs:$r),
5634               "sust.p.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5635               []>;
5636 def SUST_P_3D_B16_TRAP
5637   : NVPTXInst<(outs),
5638               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5639                    Int16Regs:$r),
5640               "sust.p.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5641               []>;
5642 def SUST_P_3D_B32_TRAP
5643   : NVPTXInst<(outs),
5644               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5645                    Int32Regs:$r),
5646               "sust.p.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5647               []>;
5648 def SUST_P_3D_V2B8_TRAP
5649   : NVPTXInst<(outs),
5650               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5651                    Int16Regs:$r, Int16Regs:$g),
5652               "sust.p.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5653               "\\{$r, $g\\};",
5654               []>;
5655 def SUST_P_3D_V2B16_TRAP
5656   : NVPTXInst<(outs),
5657               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5658                    Int16Regs:$r, Int16Regs:$g),
5659               "sust.p.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5660               "\\{$r, $g\\};",
5661               []>;
5662 def SUST_P_3D_V2B32_TRAP
5663   : NVPTXInst<(outs),
5664               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5665                    Int32Regs:$r, Int32Regs:$g),
5666               "sust.p.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5667               "\\{$r, $g\\};",
5668               []>;
5669 def SUST_P_3D_V4B8_TRAP
5670   : NVPTXInst<(outs),
5671               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5672                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5673          "sust.p.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5674          "\\{$r, $g, $b, $a\\};",
5675               []>;
5676 def SUST_P_3D_V4B16_TRAP
5677   : NVPTXInst<(outs),
5678               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5679                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5680         "sust.p.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5681         "\\{$r, $g, $b, $a\\};",
5682               []>;
5683 def SUST_P_3D_V4B32_TRAP
5684   : NVPTXInst<(outs),
5685               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5686                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5687         "sust.p.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5688         "\\{$r, $g, $b, $a\\};",
5689               []>;
5690 }
5691
5692 // Surface store instruction patterns
5693 // I'm not sure why we can't just include these in the instruction definitions,
5694 // but TableGen complains of type errors :(
5695
5696 // .clamp variant
5697 def : Pat<(int_nvvm_sust_b_1d_i8_clamp
5698            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5699           (SUST_B_1D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5700
5701 def : Pat<(int_nvvm_sust_b_1d_i16_clamp
5702            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5703           (SUST_B_1D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5704
5705 def : Pat<(int_nvvm_sust_b_1d_i32_clamp
5706            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5707           (SUST_B_1D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
5708
5709 def : Pat<(int_nvvm_sust_b_1d_i64_clamp
5710            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5711           (SUST_B_1D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
5712
5713 def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp
5714            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5715           (SUST_B_1D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x,
5716            Int16Regs:$r, Int16Regs:$g)>;
5717
5718 def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp
5719            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5720           (SUST_B_1D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x,
5721            Int16Regs:$r, Int16Regs:$g)>;
5722
5723 def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp
5724            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5725           (SUST_B_1D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x,
5726            Int32Regs:$r, Int32Regs:$g)>;
5727
5728 def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp
5729            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5730           (SUST_B_1D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x,
5731            Int64Regs:$r, Int64Regs:$g)>;
5732
5733 def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp
5734            Int64Regs:$s, Int32Regs:$x,
5735            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5736           (SUST_B_1D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x,
5737            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5738
5739 def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp
5740            Int64Regs:$s, Int32Regs:$x,
5741            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5742           (SUST_B_1D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x,
5743            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5744
5745 def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp
5746            Int64Regs:$s, Int32Regs:$x,
5747            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5748           (SUST_B_1D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x,
5749            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5750
5751
5752
5753 def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp
5754            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5755           (SUST_B_1D_ARRAY_B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5756            Int16Regs:$r)>;
5757
5758 def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp
5759            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5760           (SUST_B_1D_ARRAY_B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5761            Int16Regs:$r)>;
5762
5763 def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp
5764            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
5765           (SUST_B_1D_ARRAY_B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5766            Int32Regs:$r)>;
5767
5768 def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp
5769            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
5770           (SUST_B_1D_ARRAY_B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5771            Int64Regs:$r)>;
5772
5773 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp
5774           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5775           (SUST_B_1D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5776            Int16Regs:$r, Int16Regs:$g)>;
5777
5778 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp
5779           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5780           (SUST_B_1D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5781            Int16Regs:$r, Int16Regs:$g)>;
5782
5783 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp
5784           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5785           (SUST_B_1D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5786            Int32Regs:$r, Int32Regs:$g)>;
5787
5788 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp
5789           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5790           (SUST_B_1D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5791            Int64Regs:$r, Int64Regs:$g)>;
5792
5793 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp
5794            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5795            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5796           (SUST_B_1D_ARRAY_V4B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5797            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5798
5799 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp
5800            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5801            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5802           (SUST_B_1D_ARRAY_V4B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5803            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5804
5805 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp
5806            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5807            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5808           (SUST_B_1D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5809            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5810
5811
5812
5813 def : Pat<(int_nvvm_sust_b_2d_i8_clamp
5814            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5815           (SUST_B_2D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5816            Int16Regs:$r)>;
5817
5818 def : Pat<(int_nvvm_sust_b_2d_i16_clamp
5819            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5820           (SUST_B_2D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5821            Int16Regs:$r)>;
5822
5823 def : Pat<(int_nvvm_sust_b_2d_i32_clamp
5824            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5825           (SUST_B_2D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5826            Int32Regs:$r)>;
5827
5828 def : Pat<(int_nvvm_sust_b_2d_i64_clamp
5829            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5830           (SUST_B_2D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5831            Int64Regs:$r)>;
5832
5833 def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp
5834           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5835           (SUST_B_2D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5836            Int16Regs:$r, Int16Regs:$g)>;
5837
5838 def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp
5839           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5840           (SUST_B_2D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5841            Int16Regs:$r, Int16Regs:$g)>;
5842
5843 def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp
5844           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
5845           (SUST_B_2D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5846            Int32Regs:$r, Int32Regs:$g)>;
5847
5848 def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp
5849           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
5850           (SUST_B_2D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5851            Int64Regs:$r, Int64Regs:$g)>;
5852
5853 def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp
5854            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5855            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5856           (SUST_B_2D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5857            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5858
5859 def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp
5860            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5861            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5862           (SUST_B_2D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5863            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5864
5865 def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp
5866            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5867            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5868           (SUST_B_2D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5869            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5870
5871
5872
5873 def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp
5874           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5875           (SUST_B_2D_ARRAY_B8_CLAMP Int64Regs:$s,
5876            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5877            Int16Regs:$r)>;
5878
5879 def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp
5880           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5881           (SUST_B_2D_ARRAY_B16_CLAMP Int64Regs:$s,
5882            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5883            Int16Regs:$r)>;
5884
5885 def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp
5886           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5887           (SUST_B_2D_ARRAY_B32_CLAMP Int64Regs:$s,
5888            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5889            Int32Regs:$r)>;
5890
5891 def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp
5892           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5893           (SUST_B_2D_ARRAY_B64_CLAMP Int64Regs:$s,
5894            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5895            Int64Regs:$r)>;
5896
5897 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp
5898            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5899            Int16Regs:$r, Int16Regs:$g),
5900           (SUST_B_2D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l,
5901            Int32Regs:$x, Int32Regs:$y,
5902            Int16Regs:$r, Int16Regs:$g)>;
5903
5904 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp
5905            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5906            Int16Regs:$r, Int16Regs:$g),
5907           (SUST_B_2D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l,
5908            Int32Regs:$x, Int32Regs:$y,
5909            Int16Regs:$r, Int16Regs:$g)>;
5910
5911 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp
5912            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5913            Int32Regs:$g),
5914           (SUST_B_2D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l,
5915            Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
5916
5917 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp
5918            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5919            Int64Regs:$g),
5920           (SUST_B_2D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l,
5921            Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
5922
5923 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp
5924            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5925            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5926           (SUST_B_2D_ARRAY_V4B8_CLAMP Int64Regs:$s,
5927            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5928            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5929
5930 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp
5931            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5932            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5933           (SUST_B_2D_ARRAY_V4B16_CLAMP Int64Regs:$s,
5934            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5935            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5936
5937 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp
5938            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5939            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5940           (SUST_B_2D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l,
5941            Int32Regs:$x, Int32Regs:$y,
5942            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5943
5944
5945
5946 def : Pat<(int_nvvm_sust_b_3d_i8_clamp
5947            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5948            Int16Regs:$r),
5949           (SUST_B_3D_B8_CLAMP Int64Regs:$s,
5950            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5951            Int16Regs:$r)>;
5952
5953 def : Pat<(int_nvvm_sust_b_3d_i16_clamp
5954            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5955            Int16Regs:$r),
5956           (SUST_B_3D_B16_CLAMP Int64Regs:$s,
5957            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5958            Int16Regs:$r)>;
5959
5960 def : Pat<(int_nvvm_sust_b_3d_i32_clamp
5961            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5962            Int32Regs:$r),
5963           (SUST_B_3D_B32_CLAMP Int64Regs:$s,
5964            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5965            Int32Regs:$r)>;
5966
5967 def : Pat<(int_nvvm_sust_b_3d_i64_clamp
5968            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5969            Int64Regs:$r),
5970           (SUST_B_3D_B64_CLAMP Int64Regs:$s,
5971            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5972            Int64Regs:$r)>;
5973
5974 def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp
5975            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5976            Int16Regs:$r, Int16Regs:$g),
5977           (SUST_B_3D_V2B8_CLAMP Int64Regs:$s,
5978            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5979            Int16Regs:$r, Int16Regs:$g)>;
5980
5981 def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp
5982            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5983            Int16Regs:$r, Int16Regs:$g),
5984           (SUST_B_3D_V2B16_CLAMP Int64Regs:$s,
5985            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5986            Int16Regs:$r, Int16Regs:$g)>;
5987
5988 def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp
5989            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5990            Int32Regs:$r, Int32Regs:$g),
5991           (SUST_B_3D_V2B32_CLAMP Int64Regs:$s,
5992            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5993            Int32Regs:$r, Int32Regs:$g)>;
5994
5995 def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp
5996            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5997            Int64Regs:$r, Int64Regs:$g),
5998           (SUST_B_3D_V2B64_CLAMP Int64Regs:$s,
5999            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6000            Int64Regs:$r, Int64Regs:$g)>;
6001
6002 def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp
6003            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6004            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6005           (SUST_B_3D_V4B8_CLAMP Int64Regs:$s,
6006            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6007            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6008
6009 def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp
6010            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6011            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6012           (SUST_B_3D_V4B16_CLAMP Int64Regs:$s,
6013            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6014            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6015
6016 def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp
6017            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6018            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6019           (SUST_B_3D_V4B32_CLAMP Int64Regs:$s,
6020            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6021            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6022
6023
6024 // .trap variant
6025 def : Pat<(int_nvvm_sust_b_1d_i8_trap
6026            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6027           (SUST_B_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6028
6029 def : Pat<(int_nvvm_sust_b_1d_i16_trap
6030            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6031           (SUST_B_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6032
6033 def : Pat<(int_nvvm_sust_b_1d_i32_trap
6034            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6035           (SUST_B_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6036
6037 def : Pat<(int_nvvm_sust_b_1d_i64_trap
6038            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
6039           (SUST_B_1D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
6040
6041 def : Pat<(int_nvvm_sust_b_1d_v2i8_trap
6042            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6043           (SUST_B_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
6044            Int16Regs:$r, Int16Regs:$g)>;
6045
6046 def : Pat<(int_nvvm_sust_b_1d_v2i16_trap
6047            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6048           (SUST_B_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
6049            Int16Regs:$r, Int16Regs:$g)>;
6050
6051 def : Pat<(int_nvvm_sust_b_1d_v2i32_trap
6052            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6053           (SUST_B_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
6054            Int32Regs:$r, Int32Regs:$g)>;
6055
6056 def : Pat<(int_nvvm_sust_b_1d_v2i64_trap
6057            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6058           (SUST_B_1D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x,
6059            Int64Regs:$r, Int64Regs:$g)>;
6060
6061 def : Pat<(int_nvvm_sust_b_1d_v4i8_trap
6062            Int64Regs:$s, Int32Regs:$x,
6063            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6064           (SUST_B_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
6065            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6066
6067 def : Pat<(int_nvvm_sust_b_1d_v4i16_trap
6068            Int64Regs:$s, Int32Regs:$x,
6069            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6070           (SUST_B_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
6071            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6072
6073 def : Pat<(int_nvvm_sust_b_1d_v4i32_trap
6074            Int64Regs:$s, Int32Regs:$x,
6075            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6076           (SUST_B_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
6077            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6078
6079
6080
6081 def : Pat<(int_nvvm_sust_b_1d_array_i8_trap
6082            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6083           (SUST_B_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6084            Int16Regs:$r)>;
6085
6086 def : Pat<(int_nvvm_sust_b_1d_array_i16_trap
6087            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6088           (SUST_B_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6089            Int16Regs:$r)>;
6090
6091 def : Pat<(int_nvvm_sust_b_1d_array_i32_trap
6092            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6093           (SUST_B_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6094            Int32Regs:$r)>;
6095
6096 def : Pat<(int_nvvm_sust_b_1d_array_i64_trap
6097            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
6098           (SUST_B_1D_ARRAY_B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6099            Int64Regs:$r)>;
6100
6101 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap
6102           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6103           (SUST_B_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6104            Int16Regs:$r, Int16Regs:$g)>;
6105
6106 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap
6107           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6108           (SUST_B_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6109            Int16Regs:$r, Int16Regs:$g)>;
6110
6111 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap
6112           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6113           (SUST_B_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6114            Int32Regs:$r, Int32Regs:$g)>;
6115
6116 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap
6117           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6118           (SUST_B_1D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6119            Int64Regs:$r, Int64Regs:$g)>;
6120
6121 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap
6122            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6123            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6124           (SUST_B_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6125            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6126
6127 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap
6128            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6129            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6130           (SUST_B_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6131            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6132
6133 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap
6134            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6135            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6136           (SUST_B_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6137            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6138
6139
6140
6141 def : Pat<(int_nvvm_sust_b_2d_i8_trap
6142            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6143           (SUST_B_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6144            Int16Regs:$r)>;
6145
6146 def : Pat<(int_nvvm_sust_b_2d_i16_trap
6147            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6148           (SUST_B_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6149            Int16Regs:$r)>;
6150
6151 def : Pat<(int_nvvm_sust_b_2d_i32_trap
6152            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6153           (SUST_B_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6154            Int32Regs:$r)>;
6155
6156 def : Pat<(int_nvvm_sust_b_2d_i64_trap
6157            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6158           (SUST_B_2D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6159            Int64Regs:$r)>;
6160
6161 def : Pat<(int_nvvm_sust_b_2d_v2i8_trap
6162           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6163           (SUST_B_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6164            Int16Regs:$r, Int16Regs:$g)>;
6165
6166 def : Pat<(int_nvvm_sust_b_2d_v2i16_trap
6167           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6168           (SUST_B_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6169            Int16Regs:$r, Int16Regs:$g)>;
6170
6171 def : Pat<(int_nvvm_sust_b_2d_v2i32_trap
6172           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6173           (SUST_B_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6174            Int32Regs:$r, Int32Regs:$g)>;
6175
6176 def : Pat<(int_nvvm_sust_b_2d_v2i64_trap
6177           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
6178           (SUST_B_2D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6179            Int64Regs:$r, Int64Regs:$g)>;
6180
6181 def : Pat<(int_nvvm_sust_b_2d_v4i8_trap
6182            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6183            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6184           (SUST_B_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6185            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6186
6187 def : Pat<(int_nvvm_sust_b_2d_v4i16_trap
6188            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6189            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6190           (SUST_B_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6191            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6192
6193 def : Pat<(int_nvvm_sust_b_2d_v4i32_trap
6194            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6195            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6196           (SUST_B_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6197            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6198
6199
6200
6201 def : Pat<(int_nvvm_sust_b_2d_array_i8_trap
6202           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6203           (SUST_B_2D_ARRAY_B8_TRAP Int64Regs:$s,
6204            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6205            Int16Regs:$r)>;
6206
6207 def : Pat<(int_nvvm_sust_b_2d_array_i16_trap
6208           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6209           (SUST_B_2D_ARRAY_B16_TRAP Int64Regs:$s,
6210            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6211            Int16Regs:$r)>;
6212
6213 def : Pat<(int_nvvm_sust_b_2d_array_i32_trap
6214           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6215           (SUST_B_2D_ARRAY_B32_TRAP Int64Regs:$s,
6216            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6217            Int32Regs:$r)>;
6218
6219 def : Pat<(int_nvvm_sust_b_2d_array_i64_trap
6220           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6221           (SUST_B_2D_ARRAY_B64_TRAP Int64Regs:$s,
6222            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6223            Int64Regs:$r)>;
6224
6225 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap
6226            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6227            Int16Regs:$r, Int16Regs:$g),
6228           (SUST_B_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
6229            Int32Regs:$x, Int32Regs:$y,
6230            Int16Regs:$r, Int16Regs:$g)>;
6231
6232 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap
6233            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6234            Int16Regs:$r, Int16Regs:$g),
6235           (SUST_B_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
6236            Int32Regs:$x, Int32Regs:$y,
6237            Int16Regs:$r, Int16Regs:$g)>;
6238
6239 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap
6240            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6241            Int32Regs:$g),
6242           (SUST_B_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
6243            Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6244
6245 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap
6246            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
6247            Int64Regs:$g),
6248           (SUST_B_2D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l,
6249            Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
6250
6251 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap
6252            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6253            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6254           (SUST_B_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
6255            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6256            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6257
6258 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap
6259            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6260            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6261           (SUST_B_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
6262            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6263            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6264
6265 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap
6266            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6267            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6268           (SUST_B_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
6269            Int32Regs:$x, Int32Regs:$y,
6270            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6271
6272
6273
6274 def : Pat<(int_nvvm_sust_b_3d_i8_trap
6275            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6276            Int16Regs:$r),
6277           (SUST_B_3D_B8_TRAP Int64Regs:$s,
6278            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6279            Int16Regs:$r)>;
6280
6281 def : Pat<(int_nvvm_sust_b_3d_i16_trap
6282            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6283            Int16Regs:$r),
6284           (SUST_B_3D_B16_TRAP Int64Regs:$s,
6285            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6286            Int16Regs:$r)>;
6287
6288 def : Pat<(int_nvvm_sust_b_3d_i32_trap
6289            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6290            Int32Regs:$r),
6291           (SUST_B_3D_B32_TRAP Int64Regs:$s,
6292            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6293            Int32Regs:$r)>;
6294
6295 def : Pat<(int_nvvm_sust_b_3d_i64_trap
6296            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6297            Int64Regs:$r),
6298           (SUST_B_3D_B64_TRAP Int64Regs:$s,
6299            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6300            Int64Regs:$r)>;
6301
6302 def : Pat<(int_nvvm_sust_b_3d_v2i8_trap
6303            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6304            Int16Regs:$r, Int16Regs:$g),
6305           (SUST_B_3D_V2B8_TRAP Int64Regs:$s,
6306            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6307            Int16Regs:$r, Int16Regs:$g)>;
6308
6309 def : Pat<(int_nvvm_sust_b_3d_v2i16_trap
6310            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6311            Int16Regs:$r, Int16Regs:$g),
6312           (SUST_B_3D_V2B16_TRAP Int64Regs:$s,
6313            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6314            Int16Regs:$r, Int16Regs:$g)>;
6315
6316 def : Pat<(int_nvvm_sust_b_3d_v2i32_trap
6317            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6318            Int32Regs:$r, Int32Regs:$g),
6319           (SUST_B_3D_V2B32_TRAP Int64Regs:$s,
6320            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6321            Int32Regs:$r, Int32Regs:$g)>;
6322
6323 def : Pat<(int_nvvm_sust_b_3d_v2i64_trap
6324            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6325            Int64Regs:$r, Int64Regs:$g),
6326           (SUST_B_3D_V2B64_TRAP Int64Regs:$s,
6327            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6328            Int64Regs:$r, Int64Regs:$g)>;
6329
6330 def : Pat<(int_nvvm_sust_b_3d_v4i8_trap
6331            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6332            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6333           (SUST_B_3D_V4B8_TRAP Int64Regs:$s,
6334            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6335            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6336
6337 def : Pat<(int_nvvm_sust_b_3d_v4i16_trap
6338            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6339            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6340           (SUST_B_3D_V4B16_TRAP Int64Regs:$s,
6341            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6342            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6343
6344 def : Pat<(int_nvvm_sust_b_3d_v4i32_trap
6345            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6346            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6347           (SUST_B_3D_V4B32_TRAP Int64Regs:$s,
6348            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6349            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6350
6351
6352 // .zero variant
6353 def : Pat<(int_nvvm_sust_b_1d_i8_zero
6354            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6355           (SUST_B_1D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6356
6357 def : Pat<(int_nvvm_sust_b_1d_i16_zero
6358            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6359           (SUST_B_1D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6360
6361 def : Pat<(int_nvvm_sust_b_1d_i32_zero
6362            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6363           (SUST_B_1D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6364
6365 def : Pat<(int_nvvm_sust_b_1d_i64_zero
6366            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
6367           (SUST_B_1D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
6368
6369 def : Pat<(int_nvvm_sust_b_1d_v2i8_zero
6370            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6371           (SUST_B_1D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x,
6372            Int16Regs:$r, Int16Regs:$g)>;
6373
6374 def : Pat<(int_nvvm_sust_b_1d_v2i16_zero
6375            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6376           (SUST_B_1D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x,
6377            Int16Regs:$r, Int16Regs:$g)>;
6378
6379 def : Pat<(int_nvvm_sust_b_1d_v2i32_zero
6380            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6381           (SUST_B_1D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x,
6382            Int32Regs:$r, Int32Regs:$g)>;
6383
6384 def : Pat<(int_nvvm_sust_b_1d_v2i64_zero
6385            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6386           (SUST_B_1D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x,
6387            Int64Regs:$r, Int64Regs:$g)>;
6388
6389 def : Pat<(int_nvvm_sust_b_1d_v4i8_zero
6390            Int64Regs:$s, Int32Regs:$x,
6391            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6392           (SUST_B_1D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x,
6393            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6394
6395 def : Pat<(int_nvvm_sust_b_1d_v4i16_zero
6396            Int64Regs:$s, Int32Regs:$x,
6397            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6398           (SUST_B_1D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x,
6399            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6400
6401 def : Pat<(int_nvvm_sust_b_1d_v4i32_zero
6402            Int64Regs:$s, Int32Regs:$x,
6403            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6404           (SUST_B_1D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x,
6405            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6406
6407
6408
6409 def : Pat<(int_nvvm_sust_b_1d_array_i8_zero
6410            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6411           (SUST_B_1D_ARRAY_B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6412            Int16Regs:$r)>;
6413
6414 def : Pat<(int_nvvm_sust_b_1d_array_i16_zero
6415            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6416           (SUST_B_1D_ARRAY_B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6417            Int16Regs:$r)>;
6418
6419 def : Pat<(int_nvvm_sust_b_1d_array_i32_zero
6420            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6421           (SUST_B_1D_ARRAY_B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6422            Int32Regs:$r)>;
6423
6424 def : Pat<(int_nvvm_sust_b_1d_array_i64_zero
6425            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
6426           (SUST_B_1D_ARRAY_B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6427            Int64Regs:$r)>;
6428
6429 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero
6430           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6431           (SUST_B_1D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6432            Int16Regs:$r, Int16Regs:$g)>;
6433
6434 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero
6435           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6436           (SUST_B_1D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6437            Int16Regs:$r, Int16Regs:$g)>;
6438
6439 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero
6440           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6441           (SUST_B_1D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6442            Int32Regs:$r, Int32Regs:$g)>;
6443
6444 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero
6445           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6446           (SUST_B_1D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6447            Int64Regs:$r, Int64Regs:$g)>;
6448
6449 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero
6450            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6451            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6452           (SUST_B_1D_ARRAY_V4B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6453            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6454
6455 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero
6456            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6457            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6458           (SUST_B_1D_ARRAY_V4B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6459            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6460
6461 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero
6462            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6463            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6464           (SUST_B_1D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6465            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6466
6467
6468
6469 def : Pat<(int_nvvm_sust_b_2d_i8_zero
6470            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6471           (SUST_B_2D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6472            Int16Regs:$r)>;
6473
6474 def : Pat<(int_nvvm_sust_b_2d_i16_zero
6475            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6476           (SUST_B_2D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6477            Int16Regs:$r)>;
6478
6479 def : Pat<(int_nvvm_sust_b_2d_i32_zero
6480            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6481           (SUST_B_2D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6482            Int32Regs:$r)>;
6483
6484 def : Pat<(int_nvvm_sust_b_2d_i64_zero
6485            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6486           (SUST_B_2D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6487            Int64Regs:$r)>;
6488
6489 def : Pat<(int_nvvm_sust_b_2d_v2i8_zero
6490           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6491           (SUST_B_2D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6492            Int16Regs:$r, Int16Regs:$g)>;
6493
6494 def : Pat<(int_nvvm_sust_b_2d_v2i16_zero
6495           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6496           (SUST_B_2D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6497            Int16Regs:$r, Int16Regs:$g)>;
6498
6499 def : Pat<(int_nvvm_sust_b_2d_v2i32_zero
6500           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6501           (SUST_B_2D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6502            Int32Regs:$r, Int32Regs:$g)>;
6503
6504 def : Pat<(int_nvvm_sust_b_2d_v2i64_zero
6505           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
6506           (SUST_B_2D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6507            Int64Regs:$r, Int64Regs:$g)>;
6508
6509 def : Pat<(int_nvvm_sust_b_2d_v4i8_zero
6510            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6511            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6512           (SUST_B_2D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6513            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6514
6515 def : Pat<(int_nvvm_sust_b_2d_v4i16_zero
6516            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6517            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6518           (SUST_B_2D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6519            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6520
6521 def : Pat<(int_nvvm_sust_b_2d_v4i32_zero
6522            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6523            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6524           (SUST_B_2D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6525            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6526
6527
6528
6529 def : Pat<(int_nvvm_sust_b_2d_array_i8_zero
6530           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6531           (SUST_B_2D_ARRAY_B8_ZERO Int64Regs:$s,
6532            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6533            Int16Regs:$r)>;
6534
6535 def : Pat<(int_nvvm_sust_b_2d_array_i16_zero
6536           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6537           (SUST_B_2D_ARRAY_B16_ZERO Int64Regs:$s,
6538            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6539            Int16Regs:$r)>;
6540
6541 def : Pat<(int_nvvm_sust_b_2d_array_i32_zero
6542           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6543           (SUST_B_2D_ARRAY_B32_ZERO Int64Regs:$s,
6544            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6545            Int32Regs:$r)>;
6546
6547 def : Pat<(int_nvvm_sust_b_2d_array_i64_zero
6548           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6549           (SUST_B_2D_ARRAY_B64_ZERO Int64Regs:$s,
6550            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6551            Int64Regs:$r)>;
6552
6553 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero
6554            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6555            Int16Regs:$r, Int16Regs:$g),
6556           (SUST_B_2D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l,
6557            Int32Regs:$x, Int32Regs:$y,
6558            Int16Regs:$r, Int16Regs:$g)>;
6559
6560 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero
6561            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6562            Int16Regs:$r, Int16Regs:$g),
6563           (SUST_B_2D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l,
6564            Int32Regs:$x, Int32Regs:$y,
6565            Int16Regs:$r, Int16Regs:$g)>;
6566
6567 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero
6568            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6569            Int32Regs:$g),
6570           (SUST_B_2D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l,
6571            Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6572
6573 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero
6574            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
6575            Int64Regs:$g),
6576           (SUST_B_2D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l,
6577            Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
6578
6579 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero
6580            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6581            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6582           (SUST_B_2D_ARRAY_V4B8_ZERO Int64Regs:$s,
6583            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6584            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6585
6586 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero
6587            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6588            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6589           (SUST_B_2D_ARRAY_V4B16_ZERO Int64Regs:$s,
6590            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6591            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6592
6593 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero
6594            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6595            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6596           (SUST_B_2D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l,
6597            Int32Regs:$x, Int32Regs:$y,
6598            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6599
6600
6601
6602 def : Pat<(int_nvvm_sust_b_3d_i8_zero
6603            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6604            Int16Regs:$r),
6605           (SUST_B_3D_B8_ZERO Int64Regs:$s,
6606            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6607            Int16Regs:$r)>;
6608
6609 def : Pat<(int_nvvm_sust_b_3d_i16_zero
6610            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6611            Int16Regs:$r),
6612           (SUST_B_3D_B16_ZERO Int64Regs:$s,
6613            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6614            Int16Regs:$r)>;
6615
6616 def : Pat<(int_nvvm_sust_b_3d_i32_zero
6617            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6618            Int32Regs:$r),
6619           (SUST_B_3D_B32_ZERO Int64Regs:$s,
6620            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6621            Int32Regs:$r)>;
6622
6623 def : Pat<(int_nvvm_sust_b_3d_i64_zero
6624            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6625            Int64Regs:$r),
6626           (SUST_B_3D_B64_ZERO Int64Regs:$s,
6627            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6628            Int64Regs:$r)>;
6629
6630 def : Pat<(int_nvvm_sust_b_3d_v2i8_zero
6631            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6632            Int16Regs:$r, Int16Regs:$g),
6633           (SUST_B_3D_V2B8_ZERO Int64Regs:$s,
6634            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6635            Int16Regs:$r, Int16Regs:$g)>;
6636
6637 def : Pat<(int_nvvm_sust_b_3d_v2i16_zero
6638            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6639            Int16Regs:$r, Int16Regs:$g),
6640           (SUST_B_3D_V2B16_ZERO Int64Regs:$s,
6641            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6642            Int16Regs:$r, Int16Regs:$g)>;
6643
6644 def : Pat<(int_nvvm_sust_b_3d_v2i32_zero
6645            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6646            Int32Regs:$r, Int32Regs:$g),
6647           (SUST_B_3D_V2B32_ZERO Int64Regs:$s,
6648            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6649            Int32Regs:$r, Int32Regs:$g)>;
6650
6651 def : Pat<(int_nvvm_sust_b_3d_v2i64_zero
6652            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6653            Int64Regs:$r, Int64Regs:$g),
6654           (SUST_B_3D_V2B64_ZERO Int64Regs:$s,
6655            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6656            Int64Regs:$r, Int64Regs:$g)>;
6657
6658 def : Pat<(int_nvvm_sust_b_3d_v4i8_zero
6659            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6660            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6661           (SUST_B_3D_V4B8_ZERO Int64Regs:$s,
6662            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6663            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6664
6665 def : Pat<(int_nvvm_sust_b_3d_v4i16_zero
6666            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6667            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6668           (SUST_B_3D_V4B16_ZERO Int64Regs:$s,
6669            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6670            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6671
6672 def : Pat<(int_nvvm_sust_b_3d_v4i32_zero
6673            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6674            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6675           (SUST_B_3D_V4B32_ZERO Int64Regs:$s,
6676            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6677            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6678
6679
6680
6681
6682 def : Pat<(int_nvvm_sust_p_1d_i8_trap
6683            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6684           (SUST_P_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6685
6686 def : Pat<(int_nvvm_sust_p_1d_i16_trap
6687            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6688           (SUST_P_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6689
6690 def : Pat<(int_nvvm_sust_p_1d_i32_trap
6691            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6692           (SUST_P_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6693
6694 def : Pat<(int_nvvm_sust_p_1d_v2i8_trap
6695            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6696           (SUST_P_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
6697            Int16Regs:$r, Int16Regs:$g)>;
6698
6699 def : Pat<(int_nvvm_sust_p_1d_v2i16_trap
6700            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6701           (SUST_P_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
6702            Int16Regs:$r, Int16Regs:$g)>;
6703
6704 def : Pat<(int_nvvm_sust_p_1d_v2i32_trap
6705            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6706           (SUST_P_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
6707            Int32Regs:$r, Int32Regs:$g)>;
6708
6709 def : Pat<(int_nvvm_sust_p_1d_v4i8_trap
6710            Int64Regs:$s, Int32Regs:$x,
6711            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6712           (SUST_P_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
6713            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6714
6715 def : Pat<(int_nvvm_sust_p_1d_v4i16_trap
6716            Int64Regs:$s, Int32Regs:$x,
6717            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6718           (SUST_P_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
6719            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6720
6721 def : Pat<(int_nvvm_sust_p_1d_v4i32_trap
6722            Int64Regs:$s, Int32Regs:$x,
6723            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6724           (SUST_P_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
6725            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6726
6727
6728
6729 def : Pat<(int_nvvm_sust_p_1d_array_i8_trap
6730            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6731           (SUST_P_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6732            Int16Regs:$r)>;
6733
6734 def : Pat<(int_nvvm_sust_p_1d_array_i16_trap
6735            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6736           (SUST_P_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6737            Int16Regs:$r)>;
6738
6739 def : Pat<(int_nvvm_sust_p_1d_array_i32_trap
6740            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6741           (SUST_P_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6742            Int32Regs:$r)>;
6743
6744 def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap
6745           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6746           (SUST_P_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6747            Int16Regs:$r, Int16Regs:$g)>;
6748
6749 def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap
6750           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6751           (SUST_P_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6752            Int16Regs:$r, Int16Regs:$g)>;
6753
6754 def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap
6755           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6756           (SUST_P_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6757            Int32Regs:$r, Int32Regs:$g)>;
6758
6759 def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap
6760            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6761            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6762           (SUST_P_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6763            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6764
6765 def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap
6766            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6767            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6768           (SUST_P_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6769            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6770
6771 def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap
6772            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6773            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6774           (SUST_P_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6775            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6776
6777
6778
6779 def : Pat<(int_nvvm_sust_p_2d_i8_trap
6780            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6781           (SUST_P_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6782            Int16Regs:$r)>;
6783
6784 def : Pat<(int_nvvm_sust_p_2d_i16_trap
6785            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6786           (SUST_P_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6787            Int16Regs:$r)>;
6788
6789 def : Pat<(int_nvvm_sust_p_2d_i32_trap
6790            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6791           (SUST_P_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6792            Int32Regs:$r)>;
6793
6794 def : Pat<(int_nvvm_sust_p_2d_v2i8_trap
6795           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6796           (SUST_P_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6797            Int16Regs:$r, Int16Regs:$g)>;
6798
6799 def : Pat<(int_nvvm_sust_p_2d_v2i16_trap
6800           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6801           (SUST_P_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6802            Int16Regs:$r, Int16Regs:$g)>;
6803
6804 def : Pat<(int_nvvm_sust_p_2d_v2i32_trap
6805           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6806           (SUST_P_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6807            Int32Regs:$r, Int32Regs:$g)>;
6808
6809 def : Pat<(int_nvvm_sust_p_2d_v4i8_trap
6810            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6811            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6812           (SUST_P_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6813            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6814
6815 def : Pat<(int_nvvm_sust_p_2d_v4i16_trap
6816            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6817            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6818           (SUST_P_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6819            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6820
6821 def : Pat<(int_nvvm_sust_p_2d_v4i32_trap
6822            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6823            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6824           (SUST_P_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6825            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6826
6827
6828
6829 def : Pat<(int_nvvm_sust_p_2d_array_i8_trap
6830           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6831           (SUST_P_2D_ARRAY_B8_TRAP Int64Regs:$s,
6832            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6833            Int16Regs:$r)>;
6834
6835 def : Pat<(int_nvvm_sust_p_2d_array_i16_trap
6836           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6837           (SUST_P_2D_ARRAY_B16_TRAP Int64Regs:$s,
6838            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6839            Int16Regs:$r)>;
6840
6841 def : Pat<(int_nvvm_sust_p_2d_array_i32_trap
6842           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6843           (SUST_P_2D_ARRAY_B32_TRAP Int64Regs:$s,
6844            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6845            Int32Regs:$r)>;
6846
6847 def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap
6848            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6849            Int16Regs:$r, Int16Regs:$g),
6850           (SUST_P_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
6851            Int32Regs:$x, Int32Regs:$y,
6852            Int16Regs:$r, Int16Regs:$g)>;
6853
6854 def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap
6855            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6856            Int16Regs:$r, Int16Regs:$g),
6857           (SUST_P_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
6858            Int32Regs:$x, Int32Regs:$y,
6859            Int16Regs:$r, Int16Regs:$g)>;
6860
6861 def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap
6862            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6863            Int32Regs:$g),
6864           (SUST_P_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
6865            Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6866
6867 def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap
6868            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6869            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6870           (SUST_P_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
6871            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6872            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6873
6874 def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap
6875            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6876            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6877           (SUST_P_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
6878            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6879            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6880
6881 def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap
6882            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6883            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6884           (SUST_P_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
6885            Int32Regs:$x, Int32Regs:$y,
6886            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6887
6888
6889
6890 def : Pat<(int_nvvm_sust_p_3d_i8_trap
6891            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6892            Int16Regs:$r),
6893           (SUST_P_3D_B8_TRAP Int64Regs:$s,
6894            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6895            Int16Regs:$r)>;
6896
6897 def : Pat<(int_nvvm_sust_p_3d_i16_trap
6898            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6899            Int16Regs:$r),
6900           (SUST_P_3D_B16_TRAP Int64Regs:$s,
6901            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6902            Int16Regs:$r)>;
6903
6904 def : Pat<(int_nvvm_sust_p_3d_i32_trap
6905            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6906            Int32Regs:$r),
6907           (SUST_P_3D_B32_TRAP Int64Regs:$s,
6908            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6909            Int32Regs:$r)>;
6910
6911 def : Pat<(int_nvvm_sust_p_3d_v2i8_trap
6912            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6913            Int16Regs:$r, Int16Regs:$g),
6914           (SUST_P_3D_V2B8_TRAP Int64Regs:$s,
6915            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6916            Int16Regs:$r, Int16Regs:$g)>;
6917
6918 def : Pat<(int_nvvm_sust_p_3d_v2i16_trap
6919            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6920            Int16Regs:$r, Int16Regs:$g),
6921           (SUST_P_3D_V2B16_TRAP Int64Regs:$s,
6922            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6923            Int16Regs:$r, Int16Regs:$g)>;
6924
6925 def : Pat<(int_nvvm_sust_p_3d_v2i32_trap
6926            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6927            Int32Regs:$r, Int32Regs:$g),
6928           (SUST_P_3D_V2B32_TRAP Int64Regs:$s,
6929            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6930            Int32Regs:$r, Int32Regs:$g)>;
6931
6932 def : Pat<(int_nvvm_sust_p_3d_v4i8_trap
6933            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6934            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6935           (SUST_P_3D_V4B8_TRAP Int64Regs:$s,
6936            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6937            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6938
6939 def : Pat<(int_nvvm_sust_p_3d_v4i16_trap
6940            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6941            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6942           (SUST_P_3D_V4B16_TRAP Int64Regs:$s,
6943            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6944            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6945
6946 def : Pat<(int_nvvm_sust_p_3d_v4i32_trap
6947            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6948            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6949           (SUST_P_3D_V4B32_TRAP Int64Regs:$s,
6950            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6951            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6952
6953
6954
6955 //===-- Old PTX Back-end Intrinsics ---------------------------------------===//
6956
6957 // These intrinsics are handled to retain compatibility with the old backend.
6958
6959 // PTX Special Purpose Register Accessor Intrinsics
6960
6961 class PTX_READ_SPECIAL_REGISTER_R64<string regname, Intrinsic intop>
6962   : NVPTXInst<(outs Int64Regs:$d), (ins),
6963               !strconcat(!strconcat("mov.u64\t$d, %", regname), ";"),
6964               [(set Int64Regs:$d, (intop))]>;
6965
6966 class PTX_READ_SPECIAL_REGISTER_R32<string regname, Intrinsic intop>
6967   : NVPTXInst<(outs Int32Regs:$d), (ins),
6968               !strconcat(!strconcat("mov.u32\t$d, %", regname), ";"),
6969               [(set Int32Regs:$d, (intop))]>;
6970
6971 // TODO Add read vector-version of special registers
6972
6973 def PTX_READ_TID_X   : PTX_READ_SPECIAL_REGISTER_R32<"tid.x",
6974                                                      int_ptx_read_tid_x>;
6975 def PTX_READ_TID_Y   : PTX_READ_SPECIAL_REGISTER_R32<"tid.y",
6976                                                      int_ptx_read_tid_y>;
6977 def PTX_READ_TID_Z   : PTX_READ_SPECIAL_REGISTER_R32<"tid.z",
6978                                                      int_ptx_read_tid_z>;
6979 def PTX_READ_TID_W   : PTX_READ_SPECIAL_REGISTER_R32<"tid.w",
6980                                                      int_ptx_read_tid_w>;
6981
6982 def PTX_READ_NTID_X   : PTX_READ_SPECIAL_REGISTER_R32<"ntid.x",
6983                                                       int_ptx_read_ntid_x>;
6984 def PTX_READ_NTID_Y   : PTX_READ_SPECIAL_REGISTER_R32<"ntid.y",
6985                                                       int_ptx_read_ntid_y>;
6986 def PTX_READ_NTID_Z   : PTX_READ_SPECIAL_REGISTER_R32<"ntid.z",
6987                                                       int_ptx_read_ntid_z>;
6988 def PTX_READ_NTID_W   : PTX_READ_SPECIAL_REGISTER_R32<"ntid.w",
6989                                                       int_ptx_read_ntid_w>;
6990
6991 def PTX_READ_LANEID  : PTX_READ_SPECIAL_REGISTER_R32<"laneid",
6992                                                      int_ptx_read_laneid>;
6993 def PTX_READ_WARPID  : PTX_READ_SPECIAL_REGISTER_R32<"warpid",
6994                                                      int_ptx_read_warpid>;
6995 def PTX_READ_NWARPID : PTX_READ_SPECIAL_REGISTER_R32<"nwarpid",
6996                                                      int_ptx_read_nwarpid>;
6997
6998 def PTX_READ_CTAID_X   : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.x",
6999                                                        int_ptx_read_ctaid_x>;
7000 def PTX_READ_CTAID_Y   : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.y",
7001                                                        int_ptx_read_ctaid_y>;
7002 def PTX_READ_CTAID_Z   : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.z",
7003                                                        int_ptx_read_ctaid_z>;
7004 def PTX_READ_CTAID_W   : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.w",
7005                                                        int_ptx_read_ctaid_w>;
7006
7007 def PTX_READ_NCTAID_X   : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.x",
7008                                                         int_ptx_read_nctaid_x>;
7009 def PTX_READ_NCTAID_Y   : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.y",
7010                                                         int_ptx_read_nctaid_y>;
7011 def PTX_READ_NCTAID_Z   : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.z",
7012                                                         int_ptx_read_nctaid_z>;
7013 def PTX_READ_NCTAID_W   : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.w",
7014                                                         int_ptx_read_nctaid_w>;
7015
7016 def PTX_READ_SMID  : PTX_READ_SPECIAL_REGISTER_R32<"smid",
7017                                                    int_ptx_read_smid>;
7018 def PTX_READ_NSMID  : PTX_READ_SPECIAL_REGISTER_R32<"nsmid",
7019                                                     int_ptx_read_nsmid>;
7020 def PTX_READ_GRIDID  : PTX_READ_SPECIAL_REGISTER_R32<"gridid",
7021                                                      int_ptx_read_gridid>;
7022
7023 def PTX_READ_LANEMASK_EQ
7024   : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_eq", int_ptx_read_lanemask_eq>;
7025 def PTX_READ_LANEMASK_LE
7026   : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_le", int_ptx_read_lanemask_le>;
7027 def PTX_READ_LANEMASK_LT
7028   : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_lt", int_ptx_read_lanemask_lt>;
7029 def PTX_READ_LANEMASK_GE
7030   : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_ge", int_ptx_read_lanemask_ge>;
7031 def PTX_READ_LANEMASK_GT
7032   : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_gt", int_ptx_read_lanemask_gt>;
7033
7034 def PTX_READ_CLOCK
7035   : PTX_READ_SPECIAL_REGISTER_R32<"clock", int_ptx_read_clock>;
7036 def PTX_READ_CLOCK64
7037   : PTX_READ_SPECIAL_REGISTER_R64<"clock64", int_ptx_read_clock64>;
7038
7039 def PTX_READ_PM0 : PTX_READ_SPECIAL_REGISTER_R32<"pm0", int_ptx_read_pm0>;
7040 def PTX_READ_PM1 : PTX_READ_SPECIAL_REGISTER_R32<"pm1", int_ptx_read_pm1>;
7041 def PTX_READ_PM2 : PTX_READ_SPECIAL_REGISTER_R32<"pm2", int_ptx_read_pm2>;
7042 def PTX_READ_PM3 : PTX_READ_SPECIAL_REGISTER_R32<"pm3", int_ptx_read_pm3>;
7043
7044 // PTX Parallel Synchronization and Communication Intrinsics
7045
7046 def PTX_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync\t$i;",
7047                              [(int_ptx_bar_sync imm:$i)]>;